summaryrefslogtreecommitdiff
path: root/apps/gperf
diff options
context:
space:
mode:
authorlevine <levine@ae88bc3d-4319-0410-8dbf-d08b4c9d3795>1996-10-21 21:41:34 +0000
committerlevine <levine@ae88bc3d-4319-0410-8dbf-d08b4c9d3795>1996-10-21 21:41:34 +0000
commita5fdebc5f6375078ec1763850a4ca23ec7fe6458 (patch)
treebcf0a25c3d45a209a6e3ac37b233a4812f29c732 /apps/gperf
downloadATCD-a5fdebc5f6375078ec1763850a4ca23ec7fe6458.tar.gz
Initial revision
Diffstat (limited to 'apps/gperf')
-rw-r--r--apps/gperf/COPYING249
-rw-r--r--apps/gperf/ChangeLog1335
-rw-r--r--apps/gperf/Makefile25
-rw-r--r--apps/gperf/README24
-rw-r--r--apps/gperf/gperf.123
-rw-r--r--apps/gperf/gperf.info1127
-rw-r--r--apps/gperf/gperf.texi1184
-rw-r--r--apps/gperf/src/Bool_Array.cpp89
-rw-r--r--apps/gperf/src/Bool_Array.h65
-rw-r--r--apps/gperf/src/Gen_Perf.cpp345
-rw-r--r--apps/gperf/src/Gen_Perf.h65
-rw-r--r--apps/gperf/src/Hash_Table.cpp84
-rw-r--r--apps/gperf/src/Hash_Table.h50
-rw-r--r--apps/gperf/src/Iterator.cpp90
-rw-r--r--apps/gperf/src/Iterator.h67
-rw-r--r--apps/gperf/src/Key_List.cpp1345
-rw-r--r--apps/gperf/src/Key_List.h116
-rw-r--r--apps/gperf/src/List_Node.cpp110
-rw-r--r--apps/gperf/src/List_Node.h65
-rw-r--r--apps/gperf/src/Makefile155
-rw-r--r--apps/gperf/src/Options.cpp616
-rw-r--r--apps/gperf/src/Options.h140
-rw-r--r--apps/gperf/src/Vectors.cpp33
-rw-r--r--apps/gperf/src/Vectors.h44
-rw-r--r--apps/gperf/src/Version.cpp25
-rw-r--r--apps/gperf/src/gperf.cpp66
-rw-r--r--apps/gperf/src/new.cpp75
-rw-r--r--apps/gperf/tests/Makefile.in72
-rw-r--r--apps/gperf/tests/ada-pred.exp54
-rw-r--r--apps/gperf/tests/ada-res.exp63
-rw-r--r--apps/gperf/tests/ada.gperf63
-rw-r--r--apps/gperf/tests/adadefs.gperf54
-rw-r--r--apps/gperf/tests/c++.gperf47
-rw-r--r--apps/gperf/tests/c-parse.gperf56
-rw-r--r--apps/gperf/tests/c.exp32
-rw-r--r--apps/gperf/tests/c.gperf32
-rw-r--r--apps/gperf/tests/configure.in26
-rw-r--r--apps/gperf/tests/gpc.gperf48
-rw-r--r--apps/gperf/tests/gplus.gperf76
-rw-r--r--apps/gperf/tests/irc.gperf63
-rw-r--r--apps/gperf/tests/makeinfo.gperf116
-rw-r--r--apps/gperf/tests/modula.exp106
-rw-r--r--apps/gperf/tests/modula2.gperf40
-rw-r--r--apps/gperf/tests/modula3.gperf106
-rw-r--r--apps/gperf/tests/pascal.exp36
-rw-r--r--apps/gperf/tests/pascal.gperf36
-rw-r--r--apps/gperf/tests/test-1.exp140
-rw-r--r--apps/gperf/tests/test-2.exp183
-rw-r--r--apps/gperf/tests/test-3.exp169
-rw-r--r--apps/gperf/tests/test-4.exp138
-rw-r--r--apps/gperf/tests/test-5.exp111
-rw-r--r--apps/gperf/tests/test-6.exp74
-rw-r--r--apps/gperf/tests/test-7.exp32
-rw-r--r--apps/gperf/tests/test.c28
54 files changed, 9613 insertions, 0 deletions
diff --git a/apps/gperf/COPYING b/apps/gperf/COPYING
new file mode 100644
index 00000000000..9a170375811
--- /dev/null
+++ b/apps/gperf/COPYING
@@ -0,0 +1,249 @@
+
+ GNU GENERAL PUBLIC LICENSE
+ Version 1, February 1989
+
+ Copyright (C) 1989 Free Software Foundation, Inc.
+ 675 Mass Ave, Cambridge, MA 02139, USA
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+ Preamble
+
+ The license agreements of most software companies try to keep users
+at the mercy of those companies. By contrast, our General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. The
+General Public License applies to the Free Software Foundation's
+software and to any other program whose authors commit to using it.
+You can use it for your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Specifically, the General Public License is designed to make
+sure that you have the freedom to give away or sell copies of free
+software, that you receive source code or can get it if you want it,
+that you can change the software or use pieces of it in new free
+programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of a such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must tell them their rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ GNU GENERAL PUBLIC LICENSE
+ TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+ 0. This License Agreement applies to any program or other work which
+contains a notice placed by the copyright holder saying it may be
+distributed under the terms of this General Public License. The
+"Program", below, refers to any such program or work, and a "work based
+on the Program" means either the Program or any work containing the
+Program or a portion of it, either verbatim or with modifications. Each
+licensee is addressed as "you".
+
+ 1. You may copy and distribute verbatim copies of the Program's source
+code as you receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice and
+disclaimer of warranty; keep intact all the notices that refer to this
+General Public License and to the absence of any warranty; and give any
+other recipients of the Program a copy of this General Public License
+along with the Program. You may charge a fee for the physical act of
+transferring a copy.
+
+ 2. You may modify your copy or copies of the Program or any portion of
+it, and copy and distribute such modifications under the terms of Paragraph
+1 above, provided that you also do the following:
+
+ a) cause the modified files to carry prominent notices stating that
+ you changed the files and the date of any change; and
+
+ b) cause the whole of any work that you distribute or publish, that
+ in whole or in part contains the Program or any part thereof, either
+ with or without modifications, to be licensed at no charge to all
+ third parties under the terms of this General Public License (except
+ that you may choose to grant warranty protection to some or all
+ third parties, at your option).
+
+ c) If the modified program normally reads commands interactively when
+ run, you must cause it, when started running for such interactive use
+ in the simplest and most usual way, to print or display an
+ announcement including an appropriate copyright notice and a notice
+ that there is no warranty (or else, saying that you provide a
+ warranty) and that users may redistribute the program under these
+ conditions, and telling the user how to view a copy of this General
+ Public License.
+
+ d) You may charge a fee for the physical act of transferring a
+ copy, and you may at your option offer warranty protection in
+ exchange for a fee.
+
+Mere aggregation of another independent work with the Program (or its
+derivative) on a volume of a storage or distribution medium does not bring
+the other work under the scope of these terms.
+
+ 3. You may copy and distribute the Program (or a portion or derivative of
+it, under Paragraph 2) in object code or executable form under the terms of
+Paragraphs 1 and 2 above provided that you also do one of the following:
+
+ a) accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of
+ Paragraphs 1 and 2 above; or,
+
+ b) accompany it with a written offer, valid for at least three
+ years, to give any third party free (except for a nominal charge
+ for the cost of distribution) a complete machine-readable copy of the
+ corresponding source code, to be distributed under the terms of
+ Paragraphs 1 and 2 above; or,
+
+ c) accompany it with the information you received as to where the
+ corresponding source code may be obtained. (This alternative is
+ allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form alone.)
+
+Source code for a work means the preferred form of the work for making
+modifications to it. For an executable file, complete source code means
+all the source code for all modules it contains; but, as a special
+exception, it need not include source code for modules which are standard
+libraries that accompany the operating system on which the executable
+file runs, or for standard header files or definitions files that
+accompany that operating system.
+
+ 4. You may not copy, modify, sublicense, distribute or transfer the
+Program except as expressly provided under this General Public License.
+Any attempt otherwise to copy, modify, sublicense, distribute or transfer
+the Program is void, and will automatically terminate your rights to use
+the Program under this License. However, parties who have received
+copies, or rights to use copies, from you under this General Public
+License will not have their licenses terminated so long as such parties
+remain in full compliance.
+
+ 5. By copying, distributing or modifying the Program (or any work based
+on the Program) you indicate your acceptance of this license to do so,
+and all its terms and conditions.
+
+ 6. Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the original
+licensor to copy, distribute or modify the Program subject to these
+terms and conditions. You may not impose any further restrictions on the
+recipients' exercise of the rights granted herein.
+
+ 7. The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of the license which applies to it and "any
+later version", you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+the license, you may choose any version ever published by the Free Software
+Foundation.
+
+ 8. If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+ NO WARRANTY
+
+ 9. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+ 10. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
+OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
+TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
+YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
+PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+ Appendix: How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to humanity, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these
+terms.
+
+ To do so, attach the following notices to the program. It is safest to
+attach them to the start of each source file to most effectively convey
+the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+ <one line to give the program's name and a brief idea of what it does.>
+ Copyright (C) 19yy <name of author>
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 1, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) 19xx name of author
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the
+appropriate parts of the General Public License. Of course, the
+commands you use may be called something other than `show w' and `show
+c'; they could even be mouse-clicks or menu items--whatever suits your
+program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a "copyright disclaimer" for the program, if
+necessary. Here a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
+ program `Gnomovision' (a program to direct compilers to make passes
+ at assemblers) written by James Hacker.
+
+ <signature of Ty Coon>, 1 April 1989
+ Ty Coon, President of Vice
+
+That's all there is to it!
diff --git a/apps/gperf/ChangeLog b/apps/gperf/ChangeLog
new file mode 100644
index 00000000000..d0e86c82103
--- /dev/null
+++ b/apps/gperf/ChangeLog
@@ -0,0 +1,1335 @@
+Sun Apr 14 14:31:10 1996 Douglas C. Schmidt (schmidt@tango.cs.wustl.edu)
+
+ * src: Changed things so that there's no longer any use of the
+ Read_Line and Std_Err code. All of this has been pushed into
+ the ACE components, which is where it belongs...
+
+ * src: Changed things so that there's no longer any use of the
+ pointless inheritance in the code. This was a result of my not
+ understanding inheritance back in 1989... ;-)
+
+ * Began to integrate GNU gperf into the ACE release. Started off
+ by bringing the standard GNU version up to date wrt to the
+ changes I made back in 1991!
+
+Tue Oct 10 16:37:28 1995 Mike Stump <mrs@cygnus.com>
+
+ * src/new.cc: Since malloc/delete are not paired, we cannot call
+ free.
+
+Wed Jan 4 12:40:14 1995 Per Bothner <bothner@kalessin.cygnus.com>
+
+ * src/Makefile.in ($(TARGETPROG)): Link with $(LDFLAGS).
+ Patch from John Interrante <interran@uluru.stanford.edu>.
+
+Sat Nov 5 19:12:48 1994 Jason Merrill (jason@phydeaux.cygnus.com)
+
+ * src/Makefile.in (LIBS): Remove.
+
+Tue Oct 18 17:51:14 1994 Per Bothner <bothner@kalessin.cygnus.com>
+
+ * src/std-err.cc: Use stderror, instead of the non-standard
+ sys_nerr and sys_errlist.
+
+Sat Sep 17 22:02:13 1994 Per Bothner (bothner@kalessin.cygnus.com)
+
+ * src/key-list.cc (output_hash_function):
+ Patch from William Bader <wbader@CSEE.Lehigh.Edu>.
+
+Fri Jul 15 09:38:11 1994 Per Bothner (bothner@cygnus.com)
+
+ * src/std-err.cc: #include <errno.h>, and only declare
+ extern int errno if errno is not a macro.
+
+Mon May 30 17:29:34 1994 Per Bothner (bothner@kalessin.cygnus.com)
+
+ * Makefile.in (src_all, install): Make sure to add '/' after
+ `pwd` in $rootme, as expected by FLAGS_TO_PASS.
+
+Wed May 11 00:47:22 1994 Jason Merrill (jason@deneb.cygnus.com)
+
+ Make libg++ build with gcc -ansi -pedantic-errors
+ * src/options.h: Lose commas at end of enumerator lists.
+
+Sun Dec 5 19:16:40 1993 Brendan Kehoe (brendan@lisa.cygnus.com)
+
+ * src/hash-table.cc (Hash_Table::~Hash_Table): Don't pass an
+ argument to fprintf, since it's not expecting one.
+
+Fri Nov 26 19:03:18 1993 Per Bothner (bothner@kalessin.cygnus.com)
+
+ * src/list-node.cc: #undef index, for the sake of broken NeXT,
+
+Thu Nov 4 11:16:03 1993 Per Bothner (bothner@kalessin.cygnus.com)
+
+ * Makefile.in (install): Use INSTALL_DATA for gperf.1.
+
+Mon Oct 25 18:40:51 1993 Per Bothner (bothner@kalessin.cygnus.com)
+
+ * src/key-list.cc (Key_List::read_keys): Use POW macro
+ to increase hash table size to power of 2.
+
+ * options.h (LARGE_STACK_ARRAYS): New flag. Defaults to zero.
+ * gen-perf.cc, key-list.cc, read-line.cc:
+ Only stack-allocate large arrays if LARGE_STACK_ARRAYS is set.
+ * main.cc (main): Only call setrlimit (RLIMIT_STACK, ...)
+ if LARGE_STACK_ARRAYS.
+
+Mon Oct 4 17:45:08 1993 Per Bothner (bothner@kalessin.cygnus.com)
+
+ * src/gen-perf.cc: Always use ANSI rand/srand instead of BSDisms.
+
+Wed Aug 18 12:19:53 1993 Per Bothner (bothner@kalessin.cygnus.com)
+
+ * Makefile.in (src_all): Make less verbose output.
+
+Fri May 28 14:01:18 1993 Per Bothner (bothner@rtl.cygnus.com)
+
+ * src/gen-perf.cc (Gen_Perf::change): Don't use gcc-specific
+ 2-operand conditional expression.
+ * src/key-list.cc (Key_List::output_lookup_array):
+ Don't use variable-size stack arrays, unless compiled by g++.
+
+Tue May 4 14:08:44 1993 Per Bothner (bothner@cygnus.com)
+
+ Changes (mostly from Peter Schauer) to permit compilation
+ using cfront 3.0 and otherwise be ARM-conforming.
+ * src/key-list.h: class Key_List must use public derivation
+ of base class Std_Err (because Gen_Perf::operator() in gen-perf.cc
+ calls Std_Err::report_error).
+ * src/gen-perf.cc (Gen_Perf::affects_prev), src/hash-table.cc
+ (Hash_Table::operator()): Don't use gcc-specific 2-operand
+ conditional expression.
+ * src/iterator.cc (Iterator::operator()): Don't use gcc-specific
+ range construct in case label.
+ * key-list.cc (Key_List::output_lookup_array, Key_List::read_keys),
+ src/gen-perf.cc (Gen_Perf::operator(), src/read-line.cc
+ (Read_Line::readln_aux): If not gcc, don't allocate
+ variable-sized arrays on stack.
+ * src/new.cc (operator new): Argument type should be size_t.
+ * key-list.cc (Key_List::output_lookup_array, Key_List::read_keys),
+ new/cc (::operator new): Don't use non-standard >?= operator.
+
+Tue Apr 27 20:11:30 1993 Per Bothner (bothner@cygnus.com)
+
+ * src/Makefile.in: Define TARGETPROG, and use it.
+
+Mon Apr 19 00:29:18 1993 Per Bothner (bothner@cygnus.com)
+
+ * Makefile.in, configure.in: Re-vamped configure scheme.
+ * gperf.texinfo: Renamed to gperf.texi.
+ * src/bool-array.{h,cc}: ANSIfy bzero->memset.
+
+Sat Jan 30 20:21:28 1993 Brendan Kehoe (brendan@lisa.cygnus.com)
+
+ * tests/Makefile.in (mostlyclean): Also delete aout, cout, m3out,
+ pout, and preout.
+
+Tue Dec 29 08:58:17 1992 Ian Lance Taylor (ian@cygnus.com)
+
+ * Makefile.in: pass $(FLAGS_TO_PASS) to all calls to make.
+ (FLAGS_TO_PASS): added INSTALL, INSTALL_DATA, INSTALL_PROGRAM.
+
+Mon Dec 21 18:46:46 1992 Per Bothner (bothner@rtl.cygnus.com)
+
+ * tests/expected.* renamed to *.exp to fit in 14 chars.
+ * tests/Makefile.in: Update accordingly.
+ Also rename output.* to *.out.
+ * src/Makefile.in (clean): Remove gperf program.
+
+Wed Dec 9 14:33:34 1992 Per Bothner (bothner@cygnus.com)
+
+ * src/hash-table.cc, src/bool-array.h: ANSIfy bzero->memset.
+
+Thu Dec 3 19:34:12 1992 Per Bothner (bothner@cygnus.com)
+
+ * Makefile.in (distclean, realclean): Don't delete
+ Makefile before recursing.
+
+Fri Nov 6 13:41:49 1992 Per Bothner (bothner@rtl.cygnus.com)
+
+ * key-list.{h,cc}: Remove MAX_INT (and similar) constant
+ fields from Key_List class, and use INT_MAX (etc) from limits.h.
+ * key-list.{h,cc}, options.{h,cc}, vectors.h: Removed all
+ uses of initialized const fields, as they are non-standard
+ - and their use was easy to do away with. Mostly, just
+ made the constants static non-fields in the .cc file.
+
+Mon Nov 2 13:10:11 1992 Per Bothner (bothner@cygnus.com)
+
+ * tests/Makefile.in: When generating cinset.c, don't pass -C,
+ since -C assumes an ANSI compiler. Add the -C flag (with -a)
+ when generating test.out.3 instead.
+ * tests/expected.out.3: Update accordingly.
+
+Wed Aug 12 11:47:54 1992 Per Bothner (bothner@cygnus.com)
+
+ * Makefile.in: Factor out common flags into $(FLAGS_TO_PASS).
+ * Makefile.in: 'install-info' depends on gperf.info.
+
+Mon Aug 10 11:39:52 1992 Ian Lance Taylor (ian@dumbest.cygnus.com)
+
+ * Makefile.in, src/Makefile.in: always create installation
+ directories.
+
+Mon Jul 20 15:33:21 1992 Mike Stump (mrs@cygnus.com)
+
+ * src/new.cc (operator new): Add cast from void * to char *,
+ since it is not a standard conversion.
+
+Wed Jun 17 16:25:30 1992 Per Bothner (bothner@rtl.cygnus.com)
+
+ * src/gen-perf.cc: #include <_G_config.h> for _G_SYSV.
+ * src/key-list.cc: alloca() hair.
+ * src/main.cc (main): Only call getrlimit if _G_HAVE_SYS_RESOURCE.
+ * Makefile,in, {src,test}/Makefile.in: Fix *clean rules.
+
+Fri May 29 13:21:13 1992 Per Bothner (bothner@rtl.cygnus.com)
+
+ * src/gen-perf.cc: Replace USG -> _G_SYSV.
+
+Thu May 14 13:58:36 1992 Per Bothner (bothner@rtl.cygnus.com)
+
+ * src/Makefile.in: Don't pass obsolete flag -DUNLIMIT_STACK.
+ * tests/Makefile.in (clean): Fix.
+
+Sat Mar 7 00:03:56 1992 K. Richard Pixley (rich@rtl.cygnus.com)
+
+ * gperf.texinfo: added menu item hook.
+
+Wed Feb 26 18:04:40 1992 K. Richard Pixley (rich@cygnus.com)
+
+ * Makefile.in, configure.in: removed traces of namesubdir,
+ -subdirs, $(subdir), $(unsubdir), some rcs triggers. Forced
+ copyrights to '92, changed some from Cygnus to FSF.
+
+Sun Jan 26 19:21:58 1992 Per Bothner (bothner at cygnus.com)
+
+ * tests/Makefile.in: Use re-directed stdin instead of file
+ name in argv. This allows us to remove the filename
+ from the output, the expected output, and hence the diffs.
+ (Note that the input file is in $(srcdir), which we cannot
+ place in the expected out files.)
+ * tests/expected.out.[1235]: Edit out input filename,
+ to match new output.
+
+Mon Nov 4 15:04:41 1991 Douglas C. Schmidt (schmidt at bastille.ics.uci.edu)
+
+ * Need to do something about the end-of-line marker being
+ hard-coded to '\n'...
+
+ * Need to do something about the comment character being
+ hard-coded to '#'...
+
+Fri Sep 27 09:30:15 1991 Douglas C. Schmidt (schmidt at net4.ics.uci.edu)
+
+ * Fixed a stupid problem with printout out a local enum with the
+ -E option (I forgot to check for the case of 0 duplicates, so it
+ was saying 1 duplicate instead!).
+
+Mon Aug 19 00:39:40 1991 Douglas C. Schmidt (schmidt at javel.ics.uci.edu)
+
+ * Yow, all finished making gperf run with cfront/Saber C++. Not
+ really all that hard, actually, though did need to remove some
+ GNU specific hacks, like dynamically sized arrays and
+ initializing class data members in their declarations, etc.
+
+ * Bumped up the version # to reflect the recent changes.
+
+Sun Aug 18 22:25:32 1991 Douglas C. Schmidt (schmidt at javel.ics.uci.edu)
+
+ * Changed passage of Options::usage function in Options.C to have
+ a leading `&' so that Saber C++ wouldn't complain...
+
+ * Added a new header file called gperf.h that includes system-wide
+ info.
+
+ * Hacked up the release to work with Saber C++! Changed all *.cc
+ files to *.C.
+
+Mon Aug 5 21:18:47 1991 Douglas C. Schmidt (schmidt at net1.ics.uci.edu)
+
+ * Yow, hacked in the nifty changes to the Std_Err error handling
+ abstraction. This now adds format string support for printing
+ out signals and the name of the function when things go wrong.
+ Make changes throughout the source to make use of the new
+ facilities and also to make sure all previous uses of
+ Std_Err::report_error are now prefixed by the name of the class.
+
+Tue Jul 30 00:02:39 1991 Douglas C. Schmidt (schmidt at net4.ics.uci.edu)
+
+ * Make sure to add 1 to the Key_List::total_duplicates value when
+ printing it out since any time we have more than zero duplicates
+ we really have two or more duplicates!
+
+ * Added support for the -O (optimize option). This option
+ optimizes the generated lookup function by assuming that all
+ input keywords are members of the keyset from the keyfile.
+
+ * Added #define DUPLICATES and #define HASH_VALUE_RANGE (and of
+ course the enum equivalent) to the generated output. Don't know
+ how useful this will be, but it allows us to determine at a
+ glance whether we've got a minimal perfect hash function (in
+ which case TOTAL_KEYWORDS = HASH_VALUE_RANGE, and DUPLICATES =
+ 0).
+
+ * Fixed a small bug in the Key_List::output_keyword_table routine
+ that caused an extra newline to be printed if there where no
+ leading blank entries... (who cares, right?!)
+
+Mon Jul 29 22:05:40 1991 Douglas C. Schmidt (schmidt at net4.ics.uci.edu)
+
+ * Modified the handling of the -E (emit enums rather than
+ #defines) option in conjunction with the -G option. Now, if -G
+ and -E are given the enums are generated outside the lookup
+ function, rather than within it!
+
+ * Yow, as part of my Software Practice and Experience submission
+ writeup I realized I needed to make the # comment character work
+ correctly. Now if you put a backslash character ('\') in front
+ of the # it treats the first character as a #. Naturally, to
+ put a '\' character on the front of the line you need to escape
+ it also, i.e.,
+ \\I'm a line that starts with only one \
+ # I'm a comment line
+ \#define I'm walking a fine line... ;-)
+
+Wed Jun 26 11:21:02 1991 Douglas C. Schmidt (schmidt at bastille.ics.uci.edu)
+
+ * Changed all uses of the identifier `iteration_number' to
+ `generation_number' (also updated the paper!).
+
+Tue Apr 9 07:59:42 1991 Doug Schmidt (schmidt at net4.ics.uci.edu)
+
+ * Had to change a whole bunch of little thangs in key-list.cc and
+ list-node.cc to make the -I option work.
+
+ * Changed an alloca statement in key-list.cc to reflect the
+ strncasecmp modification (i.e., we now need to be able to
+ allocate a longer buffer if the -I option is used).
+
+Mon Apr 8 18:17:04 1991 Doug Schmidt (schmidt at net4.ics.uci.edu)
+
+ * Yucko, there was a bug in the handling of -c (and of course the
+ new -I command in key-list.cc). Apparently when I added the
+ super-duper hack that provided support for duplicate keys I
+ forgot to update the strcmp output...
+
+ * Boy, it has been a *long* time since I hacked this puppy. Let's
+ see, I'm about to add long-overdue support for case-insensitive
+ string comparisons to gperf's generated output code. We are
+ going to employ the hitherto unused option -I to indicte this!
+
+Thu Jun 28 16:17:27 1990 Doug Schmidt (schmidt at brilliant)
+
+ * Wow, first fix on the new job! There was a dumb error
+ in Key_List::output_lookup_function, where I printed the
+ string "&wordlist[key]" instead of the correct "&wordlist[index]".
+
+ * Added a couple of #ifdefs for USG support.
+
+Sun Jun 3 17:16:36 1990 Doug Schmidt (schmidt at crimee.ics.uci.edu)
+
+ * Updated the version number to 2.5 and sent to Doug Lea for release
+ with the latest GNU libg++.
+
+ * Changed the error handling when a keyword file cannot be opened
+ (now calls perror).
+
+Wed May 30 14:49:40 1990 Doug Schmidt (schmidt at crimee.ics.uci.edu)
+
+ * Instrumented the source code with trace statements automagically
+ inserted using my new automated trace instrumentation tool!
+
+Wed May 9 11:47:41 1990 Doug Schmidt (schmidt at siam.ics.uci.edu)
+
+ * Really fixed the previous bug. Turns out that a small amount
+ of logic had to be duplicated to handle static links that occur
+ as part of dynamic link chains. What a pain!!!
+
+Tue May 8 23:11:44 1990 Doug Schmidt (schmidt at siam.ics.uci.edu)
+
+ * Fixed a stupid bug in Key_List::output_lookup_array that was
+ causing incorrect counts to be generated when there were both
+ static and dynamic links occurring for the same hash value.
+ Also simplified the code that performs the logic in this routine.
+
+Mon Apr 30 17:37:24 1990 Doug Schmidt (schmidt at crimee.ics.uci.edu)
+
+ * Fixed stupid bug in Key_List::output_lookup_array that was
+ making the generated lookup[] array contain `chars' even
+ when the values stored in the chars are greater than 127!
+
+ * Changed the behavior of the -G (global table) option so that it
+ will output the `length[]' array in the global scope along with
+ the `word_list[]' array.
+
+ * Fixed a stupid bug in Key_List::output_lookup_function that
+ would always output the complicated `duplicate-handling' lookup
+ logic, even when there were no duplicates in the input!
+
+ * Yikes, had to modify a bunch of stuff in key-list.cc to correctly
+ handle duplicate entries. Changed the generated code so that
+ the MIN_HASH_VALUE is no longer subtracted off when calculating
+ the hash value for a keyword. This required changing some other
+ code by substituting MAX_HASH_VALUE for TOTAL_KEYS in several places.
+ Finally, this means that the generated tables may contain leading
+ null entries, but I suppose it is better to trade-off space to get
+ faster performance...
+
+Mon Mar 26 13:08:43 1990 Doug Schmidt (schmidt at crimee.ics.uci.edu)
+
+ * Updated version number to 2.4 to reflect the latest changes.
+
+ * Changed the main program so that it always prints out gperf's
+ execution timings to the generated output file.
+
+Sun Mar 25 12:39:30 1990 Doug Schmidt (schmidt at crimee.ics.uci.edu)
+
+ * Added the -Z option so that users can specify the name of the
+ generated class explicitly. Updated documentation to reflect
+ this change.
+
+ * Modified the generated C++ class interface so that the functions
+ are declared static (to remove the overhead of passing the `this'
+ pointer). This means that operator()() can no longer be used,
+ since it only works on non-static member functions.
+ Also changed things so that there is no constructor (why waste
+ the extra call, when it doesn't do anything, eh?)
+
+ * Modified the behavior of Key_List::output when the -L C++ option
+ is enabled. Previously the code generated use const data members
+ to record MIN_WORD_LENGTH, MIN_HASH_VALUE, etc. However, as
+ pointed out by James Clark this may result in suboptimal behavior
+ on the part of C++ compilers that can't inline these values.
+ Therefore, the new behavior is identical to what happens with
+ -L C, i.e., either #defines or function-specific enums are used.
+ Why sacrifice speed for some abstract notion of `code purity?' ;-)
+
+Tue Mar 6 18:17:42 1990 Doug Schmidt (schmidt at crimee.ics.uci.edu)
+
+ * Added the -E option that defines constant values using an enum
+ local to the lookup function rather than with #defines. This
+ also means that different lookup functions can reside in the
+ same file. Thanks to James Clark (jjc@ai.mit.edu).
+
+Sat Mar 3 20:19:00 1990 Doug Schmidt (schmidt at zola.ics.uci.edu)
+
+ * Added a special case to key_list::output_switch that doesn't
+ generate extra comparisons when the `-S' is given an argument
+ of 1 (the normal case). This should speed up the generated
+ code output a tad...
+
+Fri Feb 23 14:21:28 1990 Doug Schmidt (schmidt at zola.ics.uci.edu)
+
+ * Renamed all instances of member function get_keysig_size
+ to get_max_keysig_size, since this is more precise...
+
+ * Changed all occurrences of charset to keysig (stands for ``key
+ signature'') to reflect the new naming convention used in the
+ USENIX paper.
+
+Thu Feb 22 11:28:36 1990 Doug Schmidt (schmidt at crimee.ics.uci.edu)
+
+ * Changed the name of the generated associated values table from
+ asso_value to asso_values to reflect conventions in the USENIX
+ C++ paper.
+
+Thu Feb 15 23:29:03 1990 Doug Schmidt (schmidt at zola.ics.uci.edu)
+
+ * Updated the gperf.texinfo file to fix some formatting problems
+ that had crept in since last time.
+
+Wed Feb 14 23:27:24 1990 Doug Schmidt (schmidt at zola.ics.uci.edu)
+
+ * Fixed stupid bug in key-list.cc (get_special_input), wher
+ gperf replaced each '%' with the succeeding character.
+
+ * Added support for multiple target language generation. Currently
+ handled languages are C and C++, with C as the default. Updated
+ documentation and option handler to reflect the changes.
+
+ * Added a global destructor to new.cc and removed the #ifdef, since
+ the bloody thing now works with libg++.
+
+Mon Feb 14 13:00:00 1990 Doug Schmidt (schmidt at zola.ics.uci.edu)
+
+ * Found out that my gperf paper was accepted at the upcoming
+ USENIX C++ Conference in San Francisco. Yow!
+
+Tue Jan 30 09:00:29 1990 Doug Schmidt (schmidt at zola.ics.uci.edu)
+
+ * #ifdef'd out the new.cc memory allocator, since there are
+ problems with this and the libg++ stuff.
+
+ * Changed key-list.h so that class Vectors is a public (rather
+ than private) base class for class Key_List. The previous
+ form was illegal C++, but wasn't being caught by the old
+ g++ compiler. Should work now... ;-)
+
+Sun Dec 10 14:08:23 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Added several changes from rfg@ics.uci.edu. These changes
+ help to automate the build process.
+
+Wed Nov 15 15:49:33 1989 Doug Schmidt (schmidt at zola.ics.uci.edu)
+
+ * Removed conditional compilation for GATHER_STATISTICS. There's
+ really no good reason to avoid collecting this info at run-time,
+ since that section of code is *hardly* the bottleneck... ;-)
+
+ * Simplified the C output routines in Key_List::set_output_types
+ and Key_List::output_keyword_table a bit in order to
+ speed-up and clean up the code generation.
+
+ * Modified function Key_List::get_special_input so that it does
+ not try to `delete' a buffer that turned out to be too short.
+ This is important since the new memory management scheme
+ does not handle deletions. However, adding a small amount of
+ garbage won't hurt anything, since we generally don't do this
+ operation more than a couple times *at most*!
+
+ * Created a new file (new.cc) which includes my own overloaded
+ operator new. This function should dramatically reduce the
+ number of calls to malloc since it grabs large chunks and
+ doles them out in small pieces. As a result of this change
+ the class-specific `operator new' was removed from class List_Node.
+
+Tue Nov 14 21:45:30 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Continued to refine the great hack. The latest trick is to
+ try and replace most uses of dynamic memory (i.e., calls to
+ new) with uses of gcc dynamic arrays (i.e., an alloca solution).
+ This makes life much easier for the overall process-size, since
+ it reduces the amount of overhead for memory management. As a
+ side-effect from this change there is no reason to have the
+ Bool_Array::dispose member function, so it's outta here!
+
+ * Fixed a stupid bug that was an disaster waiting to happen...
+ Instead of making the boolean array large enough to index
+ max_hash_value it was only large enough to index max_hash_value
+ - 1. Once again, an off-by-one mistake in C/C++!!!!
+
+Mon Nov 13 19:38:27 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Added the final great hack! This allows us to generate hash tables
+ for near-perfect hash functions that contain duplicates, *without*
+ having to use switch statements! Since many compilers die on large
+ switch statements this feature is essential. Furthermore, it appears
+ that the generated code is often *smaller* than that put out by
+ compilers, even though a large, sparse array must be created.
+ Here's the general idea:
+
+ a. Generate the wordlist as a contiguous block of keywords,
+ just as before when using a switch statement. This
+ wordlist *must* be sorted by hash value.
+
+ b. Generate the lookup array, which is an array of signed
+ {chars,shorts,ints}, (which ever allows full coverage of
+ the wordlist dimensions). If the value v, where v =
+ lookup[hash(str,len)], is >= 0 and < TOTAL_KEYWORDS, then we
+ simply use this result as a direct access into the wordlist
+ array to snag the keyword for comparison.
+
+ c. Otherwise, if v is < -TOTAL_KEYWORDS or > TOTAL_KEYWORDS
+ this is an indication that we'll need to search through
+ some number of duplicates hash values. Using a hash
+ linking scheme we'd then index into a different part of
+ the hash table that provides the starting index and total
+ length of the duplicate entries to find via linear search!
+
+Sun Nov 12 13:48:10 1989 Doug Schmidt (schmidt at zola.ics.uci.edu)
+
+ * Simplified Key_List::output_min_max considerably by recognizing
+ that since the keyword list was already sorted by hash value finding
+ the min and max values is trivial!
+
+ * Improved the debugging diagnostics considerably in classes Key_List,
+ Hash_Table, and Gen_Perf.
+
+ * Modified the `-s' option so that a negative argument is now
+ interpreted to mean `allow the maximum associated value to be
+ about x times *smaller* than the number of input keys.' This
+ should help prevent massive explosion of generated hash table
+ size for large keysets.
+
+Sat Nov 11 11:31:13 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Added a field in class Key_List that counts the total number
+ of duplicate keywords, both static and dynamic.
+
+ * Added a new member function Bool_Array that deletes the dynamic
+ memory allocated to Bool_Array::storage_array. This space may
+ be needed for subsequent options, so it made sense to free it as
+ soon as possible...
+
+ * Renamed file/class Alpha_Vectors to Vectors, to avoid problems
+ with 14 character length filenames on SYSV. Also changed file
+ adapredefined.gperf to adadefs.gperf in the ./tests directory.
+
+ * Modified class Options by changing the member function
+ Options::total_positions to Options::get_charset_size and
+ Options::set_charset_size. These two routines now either return
+ the total charset size *or* the length of the largest keyword
+ if the user specifies the -k'*' (ALLCHARS) option. This change
+ cleans up client code.
+
+ * Merged all the cperf changes into gperf.
+
+ * Made sure to explicitly initialize perfect.fewest_collisions to
+ 0.
+
+ * Cleaned up some loose ends noticed by Nels Olson.
+ 1. Removed `if (collisions <= perfect.fewest_collisions)'
+ from Gen_Perf::affects_prev since it was superfluous.
+ 2. Removed the fields best_char_value and best_asso_value
+ from Gen_Perf. There were also unnecessary.
+ 3. Fixed a braino in the Bool_Array::bool_array_reset
+ function. Since iteration numbers can never be zero
+ the `if (bool_array.iteration_number++ == 0)' must be
+ `if (++bool_array.iteration_number == 0).'
+ 4. Modified Std_Err::report_error so that it correctly handles
+ "%%".
+
+ * It is important to note that -D no longer enables -S.
+ There is a good reason for this change, which will become
+ manifested in the next release... (suspense!).
+
+ * Made some subtle changes to Key_List::print_switch so that if finally
+ seems to work correctly. Needs more stress testing, however...
+
+ * Made a major change to the Key_List::print_switch function.
+ The user can now specify the number of switch statements to generate
+ via an argument to the -S option, i.e., -S1 means `generate 1
+ switch statement with all keywords in it,' -S2 means generate
+ 2 switch statements with 1/2 the elements in each one, etc.
+ Hopefully this will fix the problem with C compilers not being
+ able to generate code for giant switch statements (but don't
+ hold your breath!)
+
+ * Changed Key_List::length function to Key_List::keyword_list_length.
+
+ * Added a feature to main.c that prints out the starting wall-clock
+ time before the program begins and prints out the ending wall-clock
+ time when the program is finished.
+
+ * Added the GATHER_STATISTICS code in hash-table.c so we can
+ keep track of how well double hashing is doing. Eventually,
+ GATHER_STATISTICS will be added so that all instrumentation
+ code can be conditionally compiled in.
+
+ * Fixed a stupid bug in Key_List::print_switch routine. This
+ was necessary to make sure the generated switch statement worked
+ correctly when *both* `natural,' i.e., static links and dynamic
+ links, i.e., unresolved duplicates, hash to the same value.
+
+ * Modified Bool_Array::~Bool_Array destructor so that
+ it now frees the bool_array.storage_array when it is no longer
+ needed. Since this array is generally very large it makes sense
+ to return the memory to the freelist when it is no longer in use.
+
+ * Changed the interface to constructor Hash_Table::Hash_Table. This
+ constructor now passed a pointer to a power-of-two sized buffer that
+ serve as storage for the hash table. Although this weakens information
+ hiding a little bit it greatly reduces dynamic memory fragmentation,
+ since we can now obtain the memory via a call to alloca, rather
+ than malloc. This change modified Key_List::read_keys calling
+ interface.
+
+ * Since alloca is now being used more aggressively a conditional
+ compilation section was added in main.c. Taken from GNU GCC,
+ this code gets rid of any avoidable limit on stack size so that
+ alloca does not fail. It is only used if the -DRLIMIT_STACK
+ symbol is defined when gperf is compiled.
+
+ * Added warnings in option.c so that user's would be informed
+ that -r superceeds -i on the command-line.
+
+ * Rewrote Gen_Perf::affects_prev. First, the code structure
+ was cleaned up considerably (removing the need for a dreaded
+ goto!). Secondly, a major change occurred so that Gen_Perf::affects_prev
+ returns FALSE (success) when fewest_hits gets down to whatever
+ it was after inserting the previous key (instead of waiting for
+ it to reach 0). In other words, it stops trying if it can
+ resolve the new collisions added by a key, even if there are
+ still other old, unresolved collisions. This modification was
+ suggested by Nels Olson and seems to *greatly* increase the
+ speed of gperf for large keyfiles. Thanks Nels!
+
+ * In a similar vein, inside the Gen_Perf::change routine
+ the variable `perfect.fewest_collisions is no longer initialized
+ with the length of the keyword list. Instead it starts out at
+ 0 and is incremented by 1 every time change () is called.
+ The rationale for this behavior is that there are times when a
+ collision causes the number of duplicates (collisions) to
+ increase by a large amount when it would presumably just have
+ gone up by 1 if none of the asso_values were changed. That is,
+ at the beginning of change(), you could initialize fewest_hits
+ to 1+(previous value of fewest_hits) instead of to the number of
+ keys. Thanks again, Nels.
+
+ * Replaced alloca with new in the Gen_Perf::change function.
+ This should eliminate some overhead at the expense of a little
+ extra memory that is never reclaimed.
+
+ * Renamed Gen_Perf::merge_sets to Gen_Perf::compute_disjoint_union
+ to reflect the change in behavior.
+
+ * Added the -e option so users can supply a string containing
+ the characters used to separate keywords from their attributes.
+ The default behavior is ",\n".
+
+ * Removed the char *uniq_set field from LIST_NODE and modified
+ uses of uniq_set in perfect.c and keylist.c. Due to changes
+ to Gen_Perf::compute_disjoint_sets this field was no longer
+ necessary, and its removal makes the program smaller and
+ potentially faster.
+
+ * Added lots of changes/fixes suggested by Nels Olson
+ (umls.UUCP!olson@mis.ucsf.edu). In particular:
+ 1. Changed Bool_Array so that it would dynamically create
+ an array of unsigned shorts rather than ints if the
+ LO_CAL symbol was defined during program compilation.
+ This cuts the amount of dynamic memory usage in half,
+ which is important for large keyfile input.
+ 2. Added some additional debugging statements that print extra
+ info to stderr when the -d option is enabled.
+ 3. Fixed a really stupid bug in Key_List::print_switch
+ A right paren was placed at the wrong location, which broke
+ strlen ().
+ 4. Fixed a subtle problem with printing case values when keylinks
+ appear. The logic failed to account for the fact that there
+ can be keylinks *and* regular node info also!
+ 5. Changed the behavior of Key_List::read_keys so that it would
+ honor -D unequivocally, i.e., it doesn't try to turn off dup
+ handling if the user requests it, even if there are no
+ immediate links in the keyfile input.
+ 6. Modified the -j option so that -j 0 means `try random values
+ when searching for a way to resolve collisions.'
+ 7. Added a field `num_done' to the Gen_Perf struct. This is used
+ to report information collected when trying to resolve
+ hash collisions.
+ 8. Modified the merge_sets algorithm to perform a disjoint
+ union of two multisets. This ensures that subsequent
+ processing in Gen_Perf::affect_prev doesn't
+ waste time trying to change an associated value that is
+ shared between two conflicting keywords.
+ 9. Modified Gen_Perf::affects_prev so that it doesn't try
+ random jump values unless the -j 0 option is enabled.
+ 10. Fixed a silly bug in Gen_Perf::change. This problem caused
+ gperf to seg fault when the -k* option was given and the
+ keyfile file had long keywords.
+
+Sun Oct 29 00:18:55 1989 Doug Schmidt (schmidt at siam.ics.uci.edu)
+
+ * Modified class-specific new operations for Read_Line and
+ List_Node so they don't fail if SIZE is larger than twice
+ the previous buffer size. Note we double buffer size
+ everytime the previous buffer runs out, as a heuristic
+ to reduce future calls to malloc.
+
+Sun Oct 22 13:49:43 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Updated gperf version number to 2.0. Send to Doug Lea for
+ incorporation into the long-awaited `official' libg++ 1.36
+ release!
+
+ * Thanks to Nels Olson a silly bug in Gen_Perf::change ()
+ was fixed. This problem caused gperf to seg fault when
+ the -k* option was given and the keyfile file had long
+ keywords.
+
+ * Modified Key_List::print_hash_function so that it output
+ max_hash_value + 1 (rather than just max_hash_value) for
+ any associated value entries that don't correspond to
+ keyword charset characters. This should speed up rejection
+ of non-keyword strings a little in some cases.
+
+Sat Oct 21 19:28:36 1989 Doug Schmidt (schmidt at crimee.ics.uci.edu)
+
+ * Fixed Key_List::print_hash_function so that it no longer output
+ things like `return 0 + ...' Although this probably gets
+ optimized away by even the worst C compilers there isn't any
+ point tempting fate... ;-)
+
+ * Fixed class List_Node's constructor so that it wouldn't a priori
+ refuse to consider trying to hash keys whose length is less
+ than the smallest user-specified key position. It turns out
+ this is not a problem unless the user also specifies the -n
+ (NOLENGTH) option, in which case such keys most likely
+ don't have a prayer of being hashed correctly!
+
+ * Changed the name of the generated lookup table from `Hash_Table'
+ to `asso_value' to be consistent with the gperf paper.
+
+Tue Oct 17 14:19:48 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Added a flag GATHER_STATISTICS in the Makefile. If defined
+ during compilation this turns on certain collection facilities
+ that track the performance of gperf during its execution. In
+ particular, I want to see how many collisions occur for the
+ double hashing Hash_Table.
+
+ * Added a safety check so that we don't screw up if the total
+ number of `resets' of the Bool_Array exceeds MAX_INT. Since
+ this number is around 2^31 it is unlikely that this would ever
+ occur for most input, but why take the risk?
+
+ * Changed the behavior for the -a (ANSI) option so that the
+ generated prototypes use int rather than size_t for the LEN
+ parameter. It was too ugly having to #include <stddef.h> all
+ over the place...
+
+Mon Oct 16 11:00:35 1989 Doug Schmidt (schmidt at crimee.ics.uci.edu)
+
+ * Continued to work on the gperf paper for the USENIX C++
+ conference. At some point this will be merged back into
+ the gperf documentation...
+
+Sat Oct 14 20:29:43 1989 Doug Schmidt (schmidt at siam.ics.uci.edu)
+
+ * Added a majorly neat hack to Bool_Array, suggested by rfg.
+ The basic idea was to throw away the Ullman array technique.
+ The Ullman array was used to remove the need to reinitialize all
+ the Bool_Array elements to zero everytime we needed to determine
+ whether there were duplicate hash values in the keyword list.
+ The current trick uses an `iteration number' scheme, which takes
+ about 1/3 the space and reduces the overall program running a
+ time by about 20 percent for large input! The hack works as
+ follows:
+
+ 1. Dynamically allocation 1 boolean array of size k.
+ 2. Initialize the boolean array to zeros, and consider the first
+ iteration to be iteration 1.
+ 2. Then on all subsequent iterations we `reset' the bool array by
+ kicking the iteration count by 1.
+ 3. When it comes time to check whether a hash value is currently
+ in the boolean array we simply check its index location. If
+ the value stored there is *not* equal to the current iteration
+ number then the item is clearly *not* in the set. In that
+ case we assign the iteration number to that array's index
+ location for future reference. Otherwise, if the item at
+ the index location *is* equal to the iteration number we've
+ found a duplicate. No muss, no fuss!
+
+Mon Oct 2 12:30:54 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Changed some consts in options.h to enumerals, since g++
+ doesn't seem to like them at the moment!
+
+Sat Sep 30 12:55:24 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Fixed a stupid bug in Key_List::print_hash_function that manifested
+ itself if the `-k$' option was given (i.e., only use the key[length]
+ character in the hash function).
+
+ * Added support for the -C option. This makes the contents of
+ all generated tables `readonly'.
+
+ * Changed the handling of generated switches so that there is
+ only one call to str[n]?cmp. This *greatly* reduces the size of
+ the generated assembly code on all compilers I've seen.
+
+ * Fixed a subtle bug that occurred when the -l and -S option
+ was given. Code produced looked something like:
+
+ if (len != key_len || !strcmp (s1, resword->name)) return resword;
+
+ which doesn't make any sense. Clearly, this should be:
+
+ if (len == key_len && !strcmp (s1, resword->name)) return resword;
+
+Tue Sep 26 10:36:50 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Changed class Read_Line's definition so that it no longer
+ needs to know about the buffering scheme used to speed up
+ dynamic memory allocation of input keywords and their
+ associated attributes. This means that operator new is no longer
+ a friend of Read_Line.
+
+Mon Sep 25 23:17:10 1989 Doug Schmidt (schmidt at crimee.ics.uci.edu)
+
+ * Decided that Obstacks had too much overhead, so they were
+ removed in favor of super-efficient, low-overhead buffered
+ storage allocation hacks in Read_Line and List_Node.
+
+ * No longer try to inline functions that g++ complains about
+ (Key_List::Merge and Key_List::Merge_Sort).
+
+Sun Sep 24 13:11:24 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Changed classes Read_Line and List_Node to use Obstacks in order
+ to cache memory allocation for keyword strings and List_Nodes.
+
+ * Continued to experiment with inheritance schemes.
+
+ * Added a new file `alpha.h', that declares static data shared
+ (i.e., inherited) between classes List_Node and Key_List.
+
+Tue Sep 12 16:14:41 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Made numerous changes to incorporate multiple inheritance in
+ gperf.
+
+Wed Aug 16 23:04:08 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Added the -DCOMPILER_FIXED flag to the ./src/Makefile. This
+ implies that people trying to compile gperf need to have a
+ working version of the new g++ compiler (1.36.0).
+
+ * Removed some extra spaces that were being added in the generated
+ C code.
+
+Mon Jul 24 17:09:46 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Fixed PRINT_HASH_FUNCTION and PRINT_LOOKUP_FUNCTION in keylist.c
+ so that the generated functions take an unsigned int length argument.
+ If -a is enabled the prototype is (const char *str, size_t len).
+
+Fri Jul 21 13:06:15 1989 Doug Schmidt (schmidt at zola.ics.uci.edu)
+
+ * Fixed a typo in PRINT_KEYWORD_TABLE in keylist.cc that prevented
+ the indentation from working correctly.
+
+ * Fixed a horrible typo in PRINT_KEYWORD_TABLE in keylist.cc
+ that prevented links from being printed correctly.
+
+Tue Jul 18 16:04:31 1989 Doug Schmidt (schmidt at zola.ics.uci.edu)
+
+ * Fixed up readline.cc and readline.h so that they work OK
+ with g++ compilers that aren't completely up-to-date.
+ If symbol COMPILER_FIXED is defined then the behavior
+ that works on my more recent version of g++ is enabled.
+
+Sun Jul 9 17:53:28 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Changed the ./tests subdirectory Makefile so that it
+ uses $(CC) instead of gcc.
+
+Sun Jul 2 21:52:15 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Fixed a number of subtle bugs that occurred when -S was
+ combined with various and sundry options.
+
+ * Added the -G option, that makes the generated keyword table
+ a global static variable, rather than hiding it inside
+ the lookup function. This allows other functions to directly
+ access the contents in this table.
+
+ * Added the "#" feature, that allows comments inside the keyword
+ list from the input file. Comment handling takes place in readline.c.
+ This simplifies the code and reduces the number of malloc calls.
+
+ * Also added the -H option (user can give the name of the hash
+ function) and the -T option (prevents the transfer of the type decl
+ to the output file, which is useful if the type is already defined
+ elsewhere).
+
+Thu Jun 22 20:39:39 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Modified many classes so that they would inherit Std_Err as
+ a base class. This makes things more abstract...
+
+Fri Jun 16 14:23:00 1989 Doug Schmidt (schmidt at zola.ics.uci.edu)
+
+ * Modified the -f (FAST) option. This now takes an argument.
+ The argument corresponds to the number of iterations used
+ to resolve collisions. -f 0 uses the length of the
+ keyword list (which is what -f did before). This makes
+ life much easier when dealing with large keyword files.
+
+Tue Jun 6 17:53:27 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Added the -c (comparison) option. Enabling this
+ will use the strncmp function for string comparisons.
+ The default is to use strcmp.
+
+ * Fixed a typo in key_list.cc (PRINT_SWITCH). This caused
+ faulty C code to be generated when the -D, -p, and -t
+ options were all enabled.
+
+Thu May 25 14:07:21 1989 Doug Schmidt (schmidt at siam.ics.uci.edu)
+
+ * Once again, changed class Read_Line to overload global operator
+ new. Hopefully, this will work...!
+
+Sun May 21 01:51:45 1989 Doug Schmidt (schmidt at crimee.ics.uci.edu)
+
+ * Modified Key_List::print_hash_function () so that it properly
+ formats the associated values in the hash table according to
+ the maximum number of digits required to represent the largest
+ value.
+
+ * Removed the named return value from class Hash_Table's
+ operator (), since this causes a seg fault when -O is enabled.
+ No sense tripping subtle g++ bugs if we don't have to.... ;-)
+
+ * Removed the operator new hack from Read_Line, since this seemed
+ to create horrible bus error problems.
+
+ * Changed many class member functions and data members to be `static',
+ if they don't manipulate this!
+
+Fri May 12 23:06:56 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Changed class Std_Err to use static member functions, a la
+ Ada or Modula 2. This eliminates the need for an explicit
+ error-handler class object.
+
+ * Added the ``named return value'' feature to Hash_Table::operator ()
+ and Bool_Array::operator [], just for the heck of it.... ;-)
+
+ * Changed the previous hack in Read_Line so that we now use
+ the overloaded global `new' instead of NEW_STRING!
+
+Wed May 3 17:36:55 1989 Doug Schmidt (schmidt at zola.ics.uci.edu)
+
+ * Updated to version 1.7. This reflects the recent major changes
+ and the new C port.
+
+ * Modified the GNU getopt.cc routine to have a class-based interface.
+
+ * Fixed a typo in Perfect.cc ~Perfect that prevented the actual maximum
+ hash table size from being printed (maybe the stream classes
+ weren't so bad after all.... ;-).
+
+ * Added support for the -f option. This generates the perfect
+ hash function ``fast.'' It reduces the execution time of
+ gperf, at the cost of minimizing the range of hash values.
+
+Tue May 2 16:23:29 1989 Doug Schmidt (schmidt at crimee.ics.uci.edu)
+
+ * Added an efficiency hack to Read_Line. Instead of making
+ a call to operator NEW (a.k.a. malloc) for each input string
+ a new member function NEW_STRING stores a large buffer from
+ which new strings are carved out, growing the buffer if
+ necessary. It might be useful to add this throughout the
+ program....
+
+ * Removed all unnecessary calls to DELETE. If the program is about
+ to exit it is silly to waste time freeing memory.
+
+ * Added the GNU getopt program to the distribution. This makes
+ GPERF portable to systems that don't include getopt in libc.
+
+ * Added a strcspn member to class Key_List. This also increases
+ portability.
+
+ * Added the get_include_src function from keylist.c as a member
+ function in class Key_List. Hopefully every function is
+ now associated with a class. This aids abstraction and
+ modularity.
+
+ * Ported gperf to C. From now on both K&R C and GNU G++ versions
+ will be supported. There will be two ChangeLog files, one
+ for each version of the program.
+
+Mon May 1 16:41:45 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Fixed a bug with -k'*'. This now prints out *all* the cases
+ up to the length of the longest word in the keyword set.
+
+Sun Apr 30 12:15:25 1989 Doug Schmidt (schmidt at crimee.ics.uci.edu)
+
+ * Removed all use of the stream classes. Too ugly, slow, and
+ not handled by the c++-mode formatter....
+
+ * Modified the handling of links (i.e., keywords that have
+ identical hash values as other keywords). This should
+ speed up hash function generation for keyword sets with
+ many duplicate entries. The trick is to treat duplicate
+ values as equivalence classes, so that each set of duplicate
+ values is represented only once in the main list processing.
+
+ * Fixed some capitialization typos and indentations mistakes in
+ Key_List::print_hash_function.
+
+Sat Apr 29 12:04:03 1989 Doug Schmidt (schmidt at zola.ics.uci.edu)
+
+ * Fixed a typo/logico in Key_List::print_switch that prevented
+ the last keyword in the keyword list to be print out. This
+ requires further examination.....
+
+ * Fixed a stupid bug in List_Node::List_node. If the -k'*' option
+ was enabled the KEY_SET string wasn't getting terminated with
+ '\0'!
+
+Fri Apr 28 12:38:35 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Renamed strexp.h and strexp.cc to iterator.h and iterator.cc.
+ Also changed the strexp class to iterator. Continued to work
+ on style...
+
+ * Updated the version number to 1.6. This reflects all the
+ recent changes.
+
+Thu Apr 27 00:14:51 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Added the -D option that properly handles keyword sets that
+ contain duplicate hash values.
+
+ * Continued the stylistic changes. Added the #pragma once
+ directive to all the *.h files. Removed all #defines and
+ replaced them with static consts. Also moved the key_sort
+ routine from options.cc into the options class as a
+ member function.
+
+Mon Apr 3 13:26:55 1989 Doug Schmidt (schmidt at zola.ics.uci.edu)
+
+ * Made massive stylistic changes to bring source code into
+ conformance with GNU style guidelines.
+
+Thu Mar 30 23:28:45 1989 Doug Schmidt (schmidt at crimee.ics.uci.edu)
+
+ * Fixed up the output routines so that they generate code
+ corresponding to the GNU style guidelines.
+
+Sat Mar 11 13:12:37 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Fixed Stderr constructors so that they wouldn't try to
+ use the base class initializer syntax for the static
+ class variable Program_Name. G++ 1.34 is stricter in
+ enforcing the rules!
+
+Fri Mar 10 11:24:14 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Removed -v and ``| more'' from the Makefile to keep rfg happy...
+
+Thu Mar 2 12:37:30 1989 Doug Schmidt (schmidt at crimee.ics.uci.edu)
+
+ * Sent latest GNU gperf version 1.5 to Doug Lea for inclusion
+ into libg++ 1.34. Note that there is a small bug with
+ the new %{ ... %} source inclusion facility, since it doesn't
+ understand comments and will barf if %{ or %} appear nested
+ inside the outermost delimiters. This is too trivial of
+ a defect to fix at the moment...
+
+Tue Feb 28 11:19:58 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Added the -K option, which allows the user to provide a
+ alternative name for the keyword structure component.
+ The default is still ``name.''
+
+ * Added the LEX and YACC-like ability to include arbitrary
+ text at the beginning of the generated C source code output.
+ This required two new functions Get_Special_Input,
+ Key_List::Save_Include_Src;
+
+ * Fixed memory allocation bug in Key_List::Set_Types.
+ Variable Return_Type needs 1 additional location
+ to store the "*" if the -p option is used.
+
+ * Added code to NULL terminate both Struct_Tag and Return_Type,
+ *after* the strncpy (stupid mistake).
+
+Mon Feb 27 14:39:51 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Added a new option -N. This allows the user to specify the
+ name to be used for the generated lookup function. The
+ default name is still ``in_word_set.'' This makes it
+ possible to completely automate the perfect hash function
+ generation process!
+
+Mon Feb 20 23:33:14 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Corrected the Hash_Table::operator () function so that
+ *it* is responsible for deciding when a new key has the
+ same signature as a previously seen key. The key length
+ information is now used internally to this function to
+ decide whether to add to the hash table those keys with
+ the same key sets, but different lengths. Before, this
+ was handled by the Key_List::Read_Keys function. However,
+ this failed to work for certain duplicate keys, since
+ they weren't being entered into the hash table properly.
+
+Sun Feb 19 16:02:51 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Modified class Options by moving the enum Option_Type out
+ of the class. This is to satisfy the new enumeration
+ scope rules in C++.
+
+Sun Jan 15 15:12:09 1989 Doug Schmidt (schmidt at crimee.ics.uci.edu)
+
+ * Incremented the version number upto 1.4 to reflect the new
+ options that affect the generated code. Send the new
+ distribution off to Michael for use with g++ 1.33.
+
+ * Added a fix to Key_List::Read_Keys so that it checks for links
+ properly when the -n option is used. Previously, it didn't
+ catch obvious links, which caused it to spend large amount
+ of time searching for a solution that could never occur!
+
+ * Modified the Key_List data structure to record *both* the
+ minimum and the maximum key lengths. This information
+ is now computed in Key_List::Read_Keys, and thus
+ Key_List::Print_Min_Max doesn't need to bother.
+
+ * Modifed the key position iterator scheme in options.cc to
+ eliminate the need for member function Options::Advance.
+ Now, the Options::Get function performs the advancement
+ automatically, obviating the need for an extra function call.
+
+ * Added the new function Options::Print_Options, to print out
+ the user-specified command line options to generated C
+ output file.
+
+ * Added a new function, Key_List::Print_Keylength_Table,
+ which creates a table of lengths for use in speeding
+ up the keyword search. This also meant that a new
+ option, -l (LENTABLE) is recognized. It controls
+ whether the length table is printed and the comparison
+ made in the generated function ``in_word_set.''
+
+ * Added a comment at the top of the generated C code
+ output file that tells what version of gperf was used.
+ Next, I'll also dump out the command line options
+ as a comment too. Thanks to Michael Tiemann for the
+ feedback on this.
+
+ * Fixed the -n option to make it work correctly with
+ other parts of the program (most notably the Perfect::Hash
+ function and the computation of minimum and maximum lengths.
+
+Fri Jan 13 21:25:27 1989 Doug Schmidt (schmidt at siam.ics.uci.edu)
+
+ * Realized the the need to add a test that will enable
+ optimziation of the generated C code in the ``hash'' function
+ by checking whether all the requested key positions are
+ guaranteed to exist due to the comparison in `in_word_set.''
+ I'll put this in soon....
+
+Thu Jan 12 20:09:21 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Added pascal, modula3, and modula2 tests inputs to the
+ Makefile
+
+ * Recognised that there is a bug with the -n option. However
+ I'm too busy to fix it properly, right now. The problem
+ is that the generated #define end up being 0, since that's
+ my hack to make -n work. This needs complete rethinking!
+
+Tue Jan 10 00:08:16 1989 Doug Schmidt (schmidt at crimee.ics.uci.edu)
+
+ * Added a new option, -n, that instructs gperf to not use the
+ length of an identifier when computing the hash functions.
+ I'm not sure how useful this is!
+
+ * Retransmitted the distribution to rocky.oswego.edu. Hopefully,
+ this will work!
+
+ * Began fixing the indentation and capitalization to conform
+ to the GNU coding guidelines.
+
+Mon Jan 9 22:23:18 1989 Doug Schmidt (schmidt at pompe.ics.uci.edu)
+
+ * Fixed horrible bug in Read_Line::Readln_Aux. This was
+ a subtle and pernicous off-by-1 error, that overwrote
+ past the last character of the input string buffer. I
+ think this fault was killing the vax!
+
+ * Yow, fixed an oversight in List_Node::List_Node, where the
+ pointer field Next was uninitialized. Luckily, the new routine
+ seems to return 0 filled objects the first time through!
+
+Sun Jan 8 13:43:14 1989 Doug Schmidt (schmidt at crimee.ics.uci.edu)
+
+ * Modified the ``key linked'' diagnostic in Key_List::Read_Keys
+ to be more helpful and easy to read.
+
+ * Fixed the List_Node::List_Node so that it would ignore trailing
+ fields if the -t option was not enabled.
+
+ * Moved the List_Node declarations out of keylist.h and
+ into a file of its own, called listnode.cc and listnode.h
+ Made Set_Sort a member function of class List_Node.
+
+ * Massively updated the documentation in the gperf.texinfo file.
+
+ * Polished off the major revision to the print functions,
+ added a few new tests in the Makefile to check for the
+ validity of the program and ftp'ed the entire distribution
+ off to Doug Lea for libg++. ( changed it to
+ 1.3 to reflect the major changes with the generated
+ C code ).
+
+ * Fixed Key_List::Print_Switch to deal with the -p and -t options.
+ This meant that the ``still-born'' function Key_List::
+ Print_Type_Switch was superflous, so I removed it.
+ Also, removed the restriction in Option that the -p and
+ -t options couldn't be used simultaneously.
+
+ * Modified List_Node::List_Node, to perform only 1 call to
+ ``new'' when dynamically allocating memory for the Key_Set
+ and the Uniq_Set.
+
+Sat Jan 7 14:10:51 1989 Doug Schmidt (schmidt at glacier.ics.uci.edu)
+
+ * Fixed a big bug with the new policy of nesting the
+ wordlist inside of generated function ``in_word_set.''
+ I'd forgotten to declare the wordlist array as static!
+ ( arrgh ).
+
+ * Added a new function Key_List::Set_Types, that figures out
+ the return type for generated function ``in_word_set,''
+ the user-defined ``struct tag,'' if one is used, and also
+ formates the array type for the static local array.
+
+ * Changed the print routines to take advantage of the
+ new -p option.
+
+ * Began adding the hooks to allow the return of a pointer
+ to a user defined struct location from the generated
+ ``in_word_set'' function instead of the current 0 or 1
+ return value. Created function Key_List::Print_Type_Switch
+ and added option -p to class Option, allowing the user to
+ request generation of the aforementioned pointers returned
+ instead of booleans.
+
+ * Put in checks in class Option to make sure that -S and -t
+ options are not used simultaneously. This restriction
+ will be removed in subsequent releases, once I decide on
+ a clean way to implement it.
+
+ * Sent version 1.2 to Doug Lea for possible inclusion into
+ the libg++ distribution.
+
+ * Moved the static word_list array inside the generated function
+ in_word_set. This supports better data hiding.
+
+ * Added a texinfo file, gperf.texinfo
+
+ * Revised the Makefile to cleanup the droppings from texinfo
+ and changed the name of gperf.cc and gperf.h to perfect.cc
+ and perfect.h.
+
+Fri Jan 6 13:04:45 1989 Doug Schmidt (schmidt at crimee.ics.uci.edu)
+
+ * Implemented the switch statement output format. Much better
+ for large datasets in terms of space used.
+
+ * Added new functions to break up the Key_List::Output function.
+ Functions added were Key_List::Print_Switch,
+ Key_List::Print_Min_Max, Key_List::Print_Keyword_Table,
+ Key_List::Print_Hash_Function, and
+ Key_List::Print_Lookup_Function. This simplifies the big mess
+ in Key_List::Output considerably!
+
+ * Added switch statement option to Options, which potentially
+ trades time for space in the generated lookup code.
+
+Thu Jan 5 22:46:34 1989 Doug Schmidt (schmidt at siam.ics.uci.edu)
+
+ * Released version 1.1
+
+ * Fixed a bug with Gperf::Merge_Set, it was skipping letters
+ shared between the Set_1 and Set_2.
+
+ * Added the optimal min/max algorithm in Key_List::Output. This
+ runs in O (3n/2), rather than O (2n) time.
+
+ * Changed Gperf::Sort_Set to use insertion sort, rather than
+ bubble sort.
+
+ * Added a check in Key_List::Output for the special case where
+ the keys used are 1,$. It is possible to generate more
+ efficient C code in this case.
diff --git a/apps/gperf/Makefile b/apps/gperf/Makefile
new file mode 100644
index 00000000000..f9a0d4b9bd1
--- /dev/null
+++ b/apps/gperf/Makefile
@@ -0,0 +1,25 @@
+#----------------------------------------------------------------------------
+# @(#)Makefile 1.1 10/18/96
+#
+# Makefile for the Orbix applications
+#----------------------------------------------------------------------------
+
+#----------------------------------------------------------------------------
+# Local macros
+#----------------------------------------------------------------------------
+
+INFO = README
+
+DIRS = src \
+ tests
+
+#----------------------------------------------------------------------------
+# Include macros and targets
+#----------------------------------------------------------------------------
+
+include $(WRAPPER_ROOT)/include/makeinclude/wrapper_macros.GNU
+include $(WRAPPER_ROOT)/include/makeinclude/macros.GNU
+include $(WRAPPER_ROOT)/include/makeinclude/rules.common.GNU
+include $(WRAPPER_ROOT)/include/makeinclude/rules.nested.GNU
+include $(WRAPPER_ROOT)/include/makeinclude/rules.nolocal.GNU
+
diff --git a/apps/gperf/README b/apps/gperf/README
new file mode 100644
index 00000000000..bd9d14ea680
--- /dev/null
+++ b/apps/gperf/README
@@ -0,0 +1,24 @@
+While teaching a data structures course at University of California,
+Irvine, I developed a program called GPERF that generates perfect hash
+functions for sets of key words. A perfect hash function is simply:
+
+ A hash function and a data structure that allows
+ recognition of a key word in a set of words using
+ exactly 1 probe into the data structure.
+
+The gperf.texinfo file explains how the program works, the form of the
+input, what options are available, and hints on choosing the best
+options for particular key word sets. The texinfo file is readable
+both via the GNU emacs `info' command, and is also suitable for
+typesetting with TeX.
+
+The enclosed Makefile creates the executable program ``gperf'' and
+also runs some tests.
+
+Output from the GPERF program is used to recognize reserved words in
+the GNU C, GNU C++, and GNU Pascal compilers, as well as with the GNU
+indent program.
+
+Happy hacking!
+
+Douglas C. Schmidt
diff --git a/apps/gperf/gperf.1 b/apps/gperf/gperf.1
new file mode 100644
index 00000000000..5673c80062a
--- /dev/null
+++ b/apps/gperf/gperf.1
@@ -0,0 +1,23 @@
+.TH GPERF 1 "December 16, 1988
+.UC 4
+.SH NAME
+gperf \- generate a perfect hash function from a key set
+.SH SYNOPSIS
+.B gperf
+[
+.B \-adghijklnoprsStv
+] [
+.I keyfile
+]
+.SH DESCRIPTION
+
+\fIgperf\fP reads a set of ``keys'' from \fIkeyfile\fP (or, by
+default, from the standard input) and attempts to find a non-minimal
+perfect hashing function that recognizes a member of the key set in
+constant, i.e., O(1), time. If such a function is found the program
+generates a pair of \fIC\fP source code routines that perform the
+hashing and table lookup. All generated code is directed to the
+standard output.
+
+Please refer to the \fIgperf.texinfo\fP file for more information.
+This file is distributed with \fIgperf\fP release.
diff --git a/apps/gperf/gperf.info b/apps/gperf/gperf.info
new file mode 100644
index 00000000000..a0947230573
--- /dev/null
+++ b/apps/gperf/gperf.info
@@ -0,0 +1,1127 @@
+This is Info file gperf.info, produced by Makeinfo-1.55 from the input
+file ./gperf.texi.
+
+START-INFO-DIR-ENTRY
+* Gperf: (gperf). Perfect Hash Function Generator.
+END-INFO-DIR-ENTRY
+
+ This file documents the features of the GNU Perfect Hash Function
+Generator
+
+ Copyright (C) 1989 Free Software Foundation, Inc.
+
+ Permission is granted to make and distribute verbatim copies of this
+manual provided the copyright notice and this permission notice are
+preserved on all copies.
+
+ Permission is granted to copy and distribute modified versions of
+this manual under the conditions for verbatim copying, provided also
+that the section entitled "GNU General Public License" is included
+exactly as in the original, and provided that the entire resulting
+derived work is distributed under the terms of a permission notice
+identical to this one.
+
+ Permission is granted to copy and distribute translations of this
+manual into another language, under the above conditions for modified
+versions, except that the section entitled "GNU `gperf' General Public
+License" an d this permission notice may be included in translations
+approved by the Free Software Foundation instead of in the original
+English.
+
+
+File: gperf.info, Node: Top, Next: Copying, Prev: (dir), Up: (dir)
+
+Introduction
+************
+
+ This manual documents the GNU `gperf' perfect hash function generator
+utility, focusing on its features and how to use them, and how to report
+bugs.
+
+* Menu:
+
+* Copying:: GNU `gperf' General Public License says
+ how you can copy and share `gperf'.
+* Contributors:: People who have contributed to `gperf'.
+* Motivation:: Static search structures and GNU GPERF.
+* Search Structures:: Static search structures and GNU `gperf'
+* Description:: High-level discussion of how GPERF functions.
+* Options:: A description of options to the program.
+* Bugs:: Known bugs and limitations with GPERF.
+* Projects:: Things still left to do.
+* Implementation:: Implementation Details for GNU GPERF.
+* Bibliography:: Material Referenced in this Report.
+
+ -- The Detailed Node Listing --
+
+High-Level Description of GNU `gperf'
+
+* Input Format:: Input Format to `gperf'
+* Output Format:: Output Format for Generated C Code with `gperf'
+
+Input Format to `gperf'
+
+* Declarations:: `struct' Declarations and C Code Inclusion.
+* Keywords:: Format for Keyword Entries.
+* Functions:: Including Additional C Functions.
+
+
+File: gperf.info, Node: Copying, Next: Contributors, Prev: Top, Up: Top
+
+GNU GENERAL PUBLIC LICENSE
+**************************
+
+ Version 1, February 1989
+
+ Copyright (C) 1989 Free Software Foundation, Inc.
+ 675 Mass Ave, Cambridge, MA 02139, USA
+
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+Preamble
+========
+
+ The license agreements of most software companies try to keep users
+at the mercy of those companies. By contrast, our General Public
+License is intended to guarantee your freedom to share and change free
+software--to make sure the software is free for all its users. The
+General Public License applies to the Free Software Foundation's
+software and to any other program whose authors commit to using it.
+You can use it for your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Specifically, the General Public License is designed to make
+sure that you have the freedom to give away or sell copies of free
+software, that you receive source code or can get it if you want it,
+that you can change the software or use pieces of it in new free
+programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of a such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must tell them their rights.
+
+ We protect your rights with two steps: (1) copyright the software,
+and (2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+ TERMS AND CONDITIONS
+
+ 1. This License Agreement applies to any program or other work which
+ contains a notice placed by the copyright holder saying it may be
+ distributed under the terms of this General Public License. The
+ "Program", below, refers to any such program or work, and a "work
+ based on the Program" means either the Program or any work
+ containing the Program or a portion of it, either verbatim or with
+ modifications. Each licensee is addressed as "you".
+
+ 2. You may copy and distribute verbatim copies of the Program's source
+ code as you receive it, in any medium, provided that you
+ conspicuously and appropriately publish on each copy an
+ appropriate copyright notice and disclaimer of warranty; keep
+ intact all the notices that refer to this General Public License
+ and to the absence of any warranty; and give any other recipients
+ of the Program a copy of this General Public License along with
+ the Program. You may charge a fee for the physical act of
+ transferring a copy.
+
+ 3. You may modify your copy or copies of the Program or any portion of
+ it, and copy and distribute such modifications under the terms of
+ Paragraph 1 above, provided that you also do the following:
+
+ * cause the modified files to carry prominent notices stating
+ that you changed the files and the date of any change; and
+
+ * cause the whole of any work that you distribute or publish,
+ that in whole or in part contains the Program or any part
+ thereof, either with or without modifications, to be licensed
+ at no charge to all third parties under the terms of this
+ General Public License (except that you may choose to grant
+ warranty protection to some or all third parties, at your
+ option).
+
+ * If the modified program normally reads commands interactively
+ when run, you must cause it, when started running for such
+ interactive use in the simplest and most usual way, to print
+ or display an announcement including an appropriate copyright
+ notice and a notice that there is no warranty (or else,
+ saying that you provide a warranty) and that users may
+ redistribute the program under these conditions, and telling
+ the user how to view a copy of this General Public License.
+
+ * You may charge a fee for the physical act of transferring a
+ copy, and you may at your option offer warranty protection in
+ exchange for a fee.
+
+ Mere aggregation of another independent work with the Program (or
+ its derivative) on a volume of a storage or distribution medium
+ does not bring the other work under the scope of these terms.
+
+ 4. You may copy and distribute the Program (or a portion or
+ derivative of it, under Paragraph 2) in object code or executable
+ form under the terms of Paragraphs 1 and 2 above provided that you
+ also do one of the following:
+
+ * accompany it with the complete corresponding machine-readable
+ source code, which must be distributed under the terms of
+ Paragraphs 1 and 2 above; or,
+
+ * accompany it with a written offer, valid for at least three
+ years, to give any third party free (except for a nominal
+ charge for the cost of distribution) a complete
+ machine-readable copy of the corresponding source code, to be
+ distributed under the terms of Paragraphs 1 and 2 above; or,
+
+ * accompany it with the information you received as to where the
+ corresponding source code may be obtained. (This alternative
+ is allowed only for noncommercial distribution and only if you
+ received the program in object code or executable form alone.)
+
+ Source code for a work means the preferred form of the work for
+ making modifications to it. For an executable file, complete
+ source code means all the source code for all modules it contains;
+ but, as a special exception, it need not include source code for
+ modules which are standard libraries that accompany the operating
+ system on which the executable file runs, or for standard header
+ files or definitions files that accompany that operating system.
+
+ 5. You may not copy, modify, sublicense, distribute or transfer the
+ Program except as expressly provided under this General Public
+ License. Any attempt otherwise to copy, modify, sublicense,
+ distribute or transfer the Program is void, and will automatically
+ terminate your rights to use the Program under this License.
+ However, parties who have received copies, or rights to use
+ copies, from you under this General Public License will not have
+ their licenses terminated so long as such parties remain in full
+ compliance.
+
+ 6. By copying, distributing or modifying the Program (or any work
+ based on the Program) you indicate your acceptance of this license
+ to do so, and all its terms and conditions.
+
+ 7. Each time you redistribute the Program (or any work based on the
+ Program), the recipient automatically receives a license from the
+ original licensor to copy, distribute or modify the Program
+ subject to these terms and conditions. You may not impose any
+ further restrictions on the recipients' exercise of the rights
+ granted herein.
+
+ 8. The Free Software Foundation may publish revised and/or new
+ versions of the General Public License from time to time. Such
+ new versions will be similar in spirit to the present version, but
+ may differ in detail to address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+ Program specifies a version number of the license which applies to
+ it and "any later version", you have the option of following the
+ terms and conditions either of that version or of any later
+ version published by the Free Software Foundation. If the Program
+ does not specify a version number of the license, you may choose
+ any version ever published by the Free Software Foundation.
+
+ 9. If you wish to incorporate parts of the Program into other free
+ programs whose distribution conditions are different, write to the
+ author to ask for permission. For software which is copyrighted
+ by the Free Software Foundation, write to the Free Software
+ Foundation; we sometimes make exceptions for this. Our decision
+ will be guided by the two goals of preserving the free status of
+ all derivatives of our free software and of promoting the sharing
+ and reuse of software generally.
+
+ NO WARRANTY
+
+ 10. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO
+ WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE
+ LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+ HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT
+ WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT
+ NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
+ FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE
+ QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+ PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY
+ SERVICING, REPAIR OR CORRECTION.
+
+ 11. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
+ WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY
+ MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE
+ LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL,
+ INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR
+ INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+ DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU
+ OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY
+ OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN
+ ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+ END OF TERMS AND CONDITIONS
+
+Appendix: How to Apply These Terms to Your New Programs
+=======================================================
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to humanity, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these
+terms.
+
+ To do so, attach the following notices to the program. It is safest
+to attach them to the start of each source file to most effectively
+convey the exclusion of warranty; and each file should have at least the
+"copyright" line and a pointer to where the full notice is found.
+
+ ONE LINE TO GIVE THE PROGRAM'S NAME AND A BRIEF IDEA OF WHAT IT DOES.
+ Copyright (C) 19YY NAME OF AUTHOR
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 1, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+
+ Also add information on how to contact you by electronic and paper
+mail.
+
+ If the program is interactive, make it output a short notice like
+this when it starts in an interactive mode:
+
+ Gnomovision version 69, Copyright (C) 19YY NAME OF AUTHOR
+ Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+ This is free software, and you are welcome to redistribute it
+ under certain conditions; type `show c' for details.
+
+ The hypothetical commands `show w' and `show c' should show the
+appropriate parts of the General Public License. Of course, the
+commands you use may be called something other than `show w' and `show
+c'; they could even be mouse-clicks or menu items--whatever suits your
+program.
+
+ You should also get your employer (if you work as a programmer) or
+your school, if any, to sign a "copyright disclaimer" for the program,
+if necessary. Here a sample; alter the names:
+
+ Yoyodyne, Inc., hereby disclaims all copyright interest in the
+ program `Gnomovision' (a program to direct compilers to make passes
+ at assemblers) written by James Hacker.
+
+ SIGNATURE OF TY COON, 1 April 1989
+ Ty Coon, President of Vice
+
+ That's all there is to it!
+
+
+File: gperf.info, Node: Contributors, Next: Motivation, Prev: Copying, Up: Top
+
+Contributors to GNU `gperf' Utility
+***********************************
+
+ * The GNU `gperf' perfect hash function generator utility was
+ originally written in GNU C++ by Douglas C. Schmidt. It is now
+ also available in a highly-portable "old-style" C version. The
+ general idea for the perfect hash function generator was inspired
+ by Keith Bostic's algorithm written in C, and distributed to
+ net.sources around 1984. The current program is a heavily
+ modified, enhanced, and extended implementation of Keith's basic
+ idea, created at the University of California, Irvine. Bugs,
+ patches, and suggestions should be reported to schmidt at
+ ics.uci.edu.
+
+ * Special thanks is extended to Michael Tiemann and Doug Lea, for
+ providing a useful compiler, and for giving me a forum to exhibit
+ my creation.
+
+ In addition, Adam de Boor and Nels Olson provided many tips and
+ insights that greatly helped improve the quality and functionality
+ of `gperf'.
+
+
+File: gperf.info, Node: Motivation, Next: Search Structures, Prev: Contributors, Up: Top
+
+Introduction
+************
+
+ `gperf' is a perfect hash function generator written in C++. It
+transforms an *n* element user-specified keyword set *W* into a perfect
+hash function *F*. *F* uniquely maps keywords in *W* onto the range
+0..*k*, where *k* >= *n*. If *k = n* then *F* is a *minimal* perfect
+hash function. `gperf' generates a 0..*k* element static lookup table
+and a pair of C functions. These functions determine whether a given
+character string *s* occurs in *W*, using at most one probe into the
+lookup table.
+
+ `gperf' currently generates the reserved keyword recognizer for
+lexical analyzers in several production and research compilers and
+language processing tools, including GNU C, GNU C++, GNU Pascal, GNU
+Modula 3, and GNU indent. Complete C++ source code for `gperf' is
+available via anonymous ftp from ics.uci.edu. `gperf' also is
+distributed along with the GNU libg++ library. A highly portable,
+functionally equivalent K&R C version of `gperf' is archived in
+comp.sources.unix, volume 20. Finally, a paper describing `gperf''s
+design and implementation in greater detail is available in the Second
+USENIX C++ Conference proceedings.
+
+
+File: gperf.info, Node: Search Structures, Next: Description, Prev: Motivation, Up: Top
+
+Static search structures and GNU `gperf'
+****************************************
+
+ A "static search structure" is an Abstract Data Type with certain
+fundamental operations, *e.g.*, *initialize*, *insert*, and *retrieve*.
+Conceptually, all insertions occur before any retrievals. In
+practice, `gperf' generates a `static' array containing search set
+keywords and any associated attributes specified by the user. Thus,
+there is essentially no execution-time cost for the insertions. It is
+a useful data structure for representing *static search sets*. Static
+search sets occur frequently in software system applications. Typical
+static search sets include compiler reserved words, assembler
+instruction opcodes, and built-in shell interpreter commands. Search
+set members, called "keywords", are inserted into the structure only
+once, usually during program initialization, and are not generally
+modified at run-time.
+
+ Numerous static search structure implementations exist, *e.g.*,
+arrays, linked lists, binary search trees, digital search tries, and
+hash tables. Different approaches offer trade-offs between space
+utilization and search time efficiency. For example, an *n* element
+sorted array is space efficient, though the average-case time
+complexity for retrieval operations using binary search is proportional
+to log *n*. Conversely, hash table implementations often locate a
+table entry in constant time, but typically impose additional memory
+overhead and exhibit poor worst case performance.
+
+ *Minimal perfect hash functions* provide an optimal solution for a
+particular class of static search sets. A minimal perfect hash
+function is defined by two properties:
+
+ * It allows keyword recognition in a static search set using at most
+ *one* probe into the hash table. This represents the "perfect"
+ property.
+
+ * The actual memory allocated to store the keywords is precisely
+ large enough for the keyword set, and *no larger*. This is the
+ "minimal" property.
+
+ For most applications it is far easier to generate *perfect* hash
+functions than *minimal perfect* hash functions. Moreover, non-minimal
+perfect hash functions frequently execute faster than minimal ones in
+practice. This phenomena occurs since searching a sparse keyword table
+increases the probability of locating a "null" entry, thereby reducing
+string comparisons. `gperf''s default behavior generates
+*near-minimal* perfect hash functions for keyword sets. However,
+`gperf' provides many options that permit user control over the degree
+of minimality and perfection.
+
+ Static search sets often exhibit relative stability over time. For
+example, Ada's 63 reserved words have remained constant for nearly a
+decade. It is therefore frequently worthwhile to expend concerted
+effort building an optimal search structure *once*, if it subsequently
+receives heavy use multiple times. `gperf' removes the drudgery
+associated with constructing time- and space-efficient search
+structures by hand. It has proven a useful and practical tool for
+serious programming projects. Output from `gperf' is currently used in
+several production and research compilers, including GNU C, GNU C++,
+GNU Pascal, and GNU Modula 3. The latter two compilers are not yet
+part of the official GNU distribution. Each compiler utilizes `gperf'
+to automatically generate static search structures that efficiently
+identify their respective reserved keywords.
+
+
+File: gperf.info, Node: Description, Next: Options, Prev: Search Structures, Up: Top
+
+High-Level Description of GNU `gperf'
+*************************************
+
+* Menu:
+
+* Input Format:: Input Format to `gperf'
+* Output Format:: Output Format for Generated C Code with `gperf'
+
+ The perfect hash function generator `gperf' reads a set of
+"keywords" from a "keyfile" (or from the standard input by default).
+It attempts to derive a perfect hashing function that recognizes a
+member of the "static keyword set" with at most a single probe into the
+lookup table. If `gperf' succeeds in generating such a function it
+produces a pair of C source code routines that perform hashing and
+table lookup recognition. All generated C code is directed to the
+standard output. Command-line options described below allow you to
+modify the input and output format to `gperf'.
+
+ By default, `gperf' attempts to produce time-efficient code, with
+less emphasis on efficient space utilization. However, several options
+exist that permit trading-off execution time for storage space and vice
+versa. In particular, expanding the generated table size produces a
+sparse search structure, generally yielding faster searches.
+Conversely, you can direct `gperf' to utilize a C `switch' statement
+scheme that minimizes data space storage size. Furthermore, using a C
+`switch' may actually speed up the keyword retrieval time somewhat.
+Actual results depend on your C compiler, of course.
+
+ In general, `gperf' assigns values to the characters it is using for
+hashing until some set of values gives each keyword a unique value. A
+helpful heuristic is that the larger the hash value range, the easier
+it is for `gperf' to find and generate a perfect hash function.
+Experimentation is the key to getting the most from `gperf'.
+
+
+File: gperf.info, Node: Input Format, Next: Output Format, Prev: Description, Up: Description
+
+Input Format to `gperf'
+=======================
+
+ You can control the input keyfile format by varying certain
+command-line arguments, in particular the `-t' option. The input's
+appearance is similar to GNU utilities `flex' and `bison' (or UNIX
+utilities `lex' and `yacc'). Here's an outline of the general format:
+
+ declarations
+ %%
+ keywords
+ %%
+ functions
+
+ *Unlike* `flex' or `bison', all sections of `gperf''s input are
+optional. The following sections describe the input format for each
+section.
+
+* Menu:
+
+* Declarations:: `struct' Declarations and C Code Inclusion.
+* Keywords:: Format for Keyword Entries.
+* Functions:: Including Additional C Functions.
+
+
+File: gperf.info, Node: Declarations, Next: Keywords, Prev: Input Format, Up: Input Format
+
+`struct' Declarations and C Code Inclusion
+------------------------------------------
+
+ The keyword input file optionally contains a section for including
+arbitrary C declarations and definitions, as well as provisions for
+providing a user-supplied `struct'. If the `-t' option *is* enabled,
+you *must* provide a C `struct' as the last component in the
+declaration section from the keyfile file. The first field in this
+struct must be a `char *' identifier called "name," although it is
+possible to modify this field's name with the `-K' option described
+below.
+
+ Here is simple example, using months of the year and their
+attributes as input:
+
+ struct months { char *name; int number; int days; int leap_days; };
+ %%
+ january, 1, 31, 31
+ february, 2, 28, 29
+ march, 3, 31, 31
+ april, 4, 30, 30
+ may, 5, 31, 31
+ june, 6, 30, 30
+ july, 7, 31, 31
+ august, 8, 31, 31
+ september, 9, 30, 30
+ october, 10, 31, 31
+ november, 11, 30, 30
+ december, 12, 31, 31
+
+ Separating the `struct' declaration from the list of key words and
+other fields are a pair of consecutive percent signs, `%%', appearing
+left justified in the first column, as in the UNIX utility `lex'.
+
+ Using a syntax similar to GNU utilities `flex' and `bison', it is
+possible to directly include C source text and comments verbatim into
+the generated output file. This is accomplished by enclosing the region
+inside left-justified surrounding `%{', `%}' pairs. Here is an input
+fragment based on the previous example that illustrates this feature:
+
+ %{
+ #include <assert.h>
+ /* This section of code is inserted directly into the output. */
+ int return_month_days (struct months *months, int is_leap_year);
+ %}
+ struct months { char *name; int number; int days; int leap_days; };
+ %%
+ january, 1, 31, 31
+ february, 2, 28, 29
+ march, 3, 31, 31
+ ...
+
+ It is possible to omit the declaration section entirely. In this
+case the keyfile begins directly with the first keyword line, *e.g.*:
+
+ january, 1, 31, 31
+ february, 2, 28, 29
+ march, 3, 31, 31
+ april, 4, 30, 30
+ ...
+
+
+File: gperf.info, Node: Keywords, Next: Functions, Prev: Declarations, Up: Input Format
+
+Format for Keyword Entries
+--------------------------
+
+ The second keyfile format section contains lines of keywords and any
+associated attributes you might supply. A line beginning with `#' in
+the first column is considered a comment. Everything following the `#'
+is ignored, up to and including the following newline.
+
+ The first field of each non-comment line is always the key itself.
+It should be given as a simple name, *i.e.*, without surrounding string
+quotation marks, and be left-justified flush against the first column.
+In this context, a "field" is considered to extend up to, but not
+include, the first blank, comma, or newline. Here is a simple example
+taken from a partial list of C reserved words:
+
+ # These are a few C reserved words, see the c.`gperf' file
+ # for a complete list of ANSI C reserved words.
+ unsigned
+ sizeof
+ switch
+ signed
+ if
+ default
+ for
+ while
+ return
+
+ Note that unlike `flex' or `bison' the first `%%' marker may be
+elided if the declaration section is empty.
+
+ Additional fields may optionally follow the leading keyword. Fields
+should be separated by commas, and terminate at the end of line. What
+these fields mean is entirely up to you; they are used to initialize the
+elements of the user-defined `struct' provided by you in the
+declaration section. If the `-t' option is *not* enabled these fields
+are simply ignored. All previous examples except the last one contain
+keyword attributes.
+
+
+File: gperf.info, Node: Functions, Prev: Keywords, Up: Input Format
+
+Including Additional C Functions
+--------------------------------
+
+ The optional third section also corresponds closely with conventions
+found in `flex' and `bison'. All text in this section, starting at the
+final `%%' and extending to the end of the input file, is included
+verbatim into the generated output file. Naturally, it is your
+responsibility to ensure that the code contained in this section is
+valid C.
+
+
+File: gperf.info, Node: Output Format, Prev: Input Format, Up: Description
+
+Output Format for Generated C Code with `gperf'
+===============================================
+
+ Several options control how the generated C code appears on the
+standard output. Two C function are generated. They are called `hash'
+and `in_word_set', although you may modify the name for `in_word_set'
+with a command-line option. Both functions require two arguments, a
+string, `char *' STR, and a length parameter, `int' LEN. Their default
+function prototypes are as follows:
+
+ static int hash (char *str, int len);
+ int in_word_set (char *str, int len);
+
+ By default, the generated `hash' function returns an integer value
+created by adding LEN to several user-specified STR key positions
+indexed into an "associated values" table stored in a local static
+array. The associated values table is constructed internally by
+`gperf' and later output as a static local C array called HASH_TABLE;
+its meaning and properties are described below. *Note
+Implementation::. The relevant key positions are specified via the `-k'
+option when running `gperf', as detailed in the *Options* section
+below. *Note Options::.
+
+ Two options, `-g' (assume you are compiling with GNU C and its
+`inline' feature) and `-a' (assume ANSI C-style function prototypes),
+alter the content of both the generated `hash' and `in_word_set'
+routines. However, function `in_word_set' may be modified more
+extensively, in response to your option settings. The options that
+affect the `in_word_set' structure are:
+
+ `-p'
+ Have function `in_word_set' return a pointer rather than a
+ boolean.
+
+ `-t'
+ Make use of the user-defined `struct'.
+
+ `-S TOTAL SWITCH STATEMENTS'
+ Generate 1 or more C `switch' statement rather than use a
+ large, (and potentially sparse) static array. Although the
+ exact time and space savings of this approach vary according
+ to your C compiler's degree of optimization, this method
+ often results in smaller and faster code.
+
+ If the `-t', `-S', and `-p' options are omitted the default action
+is to generate a `char *' array containing the keys, together with
+additional null strings used for padding the array. By experimenting
+with the various input and output options, and timing the resulting C
+code, you can determine the best option choices for different keyword
+set characteristics.
+
+
+File: gperf.info, Node: Options, Next: Bugs, Prev: Description, Up: Top
+
+Options to the `gperf' Utility
+******************************
+
+ There are *many* options to `gperf'. They were added to make the
+program more convenient for use with real applications. "On-line" help
+is readily available via the `-h' option. Other options include:
+
+ `-a'
+ Generate ANSI Standard C code using function prototypes. The
+ default is to use "classic" K&R C function declaration syntax.
+
+ `-c'
+ Generates C code that uses the `strncmp' function to perform
+ string comparisons. The default action is to use `strcmp'.
+
+ `-C'
+ Makes the contents of all generated lookup tables constant,
+ *i.e.*, "readonly." Many compilers can generate more
+ efficient code for this by putting the tables in readonly
+ memory.
+
+ `-d'
+ Enables the debugging option. This produces verbose
+ diagnostics to "standard error" when `gperf' is executing.
+ It is useful both for maintaining the program and for
+ determining whether a given set of options is actually
+ speeding up the search for a solution. Some useful
+ information is dumped at the end of the program when the `-d'
+ option is enabled.
+
+ `-D'
+ Handle keywords whose key position sets hash to duplicate
+ values. Duplicate hash values occur for two reasons:
+
+ * Since `gperf' does not backtrack it is possible for it
+ to process all your input keywords without finding a
+ unique mapping for each word. However, frequently only
+ a very small number of duplicates occur, and the
+ majority of keys still require one probe into the table.
+
+ * Sometimes a set of keys may have the same names, but
+ possess different attributes. With the -D option
+ `gperf' treats all these keys as part of an equivalence
+ class and generates a perfect hash function with multiple
+ comparisons for duplicate keys. It is up to you to
+ completely disambiguate the keywords by modifying the
+ generated C code. However, `gperf' helps you out by
+ organizing the output.
+
+ Option `-D' is extremely useful for certain large or highly
+ redundant keyword sets, *i.e.*, assembler instruction opcodes.
+ Using this option usually means that the generated hash
+ function is no longer perfect. On the other hand, it permits
+ `gperf' to work on keyword sets that it otherwise could not
+ handle.
+
+ `-e KEYWORD DELIMITER LIST'
+ Allows the user to provide a string containing delimiters
+ used to separate keywords from their attributes. The default
+ is ",\n". This option is essential if you want to use
+ keywords that have embedded commas or newlines. One useful
+ trick is to use -e'TAB', where TAB is the literal tab
+ character.
+
+ `-E'
+ Define constant values using an enum local to the lookup
+ function rather than with #defines. This also means that
+ different lookup functions can reside in the same file.
+ Thanks to James Clark (jjc at ai.mit.edu).
+
+ `-f ITERATION AMOUNT'
+ Generate the perfect hash function "fast." This decreases
+ `gperf''s running time at the cost of minimizing generated
+ table-size. The iteration amount represents the number of
+ times to iterate when resolving a collision. `0' means
+ `iterate by the number of keywords. This option is probably
+ most useful when used in conjunction with options `-D' and/or
+ `-S' for *large* keyword sets.
+
+ `-g'
+ Assume a GNU compiler, *e.g.*, `g++' or `gcc'. This makes
+ all generated routines use the "inline" keyword to remove the
+ cost of function calls. Note that `-g' does *not* imply
+ `-a', since other non-ANSI C compilers may have provisions
+ for a function `inline' feature.
+
+ `-G'
+ Generate the static table of keywords as a static global
+ variable, rather than hiding it inside of the lookup function
+ (which is the default behavior).
+
+ `-h'
+ Prints a short summary on the meaning of each program option.
+ Aborts further program execution.
+
+ `-H HASH FUNCTION NAME'
+ Allows you to specify the name for the generated hash
+ function. Default name is `hash.' This option permits the
+ use of two hash tables in the same file.
+
+ `-i INITIAL VALUE'
+ Provides an initial VALUE for the associate values array.
+ Default is 0. Increasing the initial value helps inflate the
+ final table size, possibly leading to more time efficient
+ keyword lookups. Note that this option is not particularly
+ useful when `-S' is used. Also, `-i' is overriden when the
+ `-r' option is used.
+
+ `-j JUMP VALUE'
+ Affects the "jump value," *i.e.*, how far to advance the
+ associated character value upon collisions. JUMP VALUE is
+ rounded up to an odd number, the default is 5. If the JUMP
+ VALUE is 0 `gper f' jumps by random amounts.
+
+ `-k KEYS'
+ Allows selection of the character key positions used in the
+ keywords' hash function. The allowable choices range between
+ 1-126, inclusive. The positions are separated by commas,
+ *e.g.*, `-k 9,4,13,14'; ranges may be used, *e.g.*, `-k 2-7';
+ and positions may occur in any order. Furthermore, the
+ meta-character '*' causes the generated hash function to
+ consider *all* character positions in each key, whereas '$'
+ instructs the hash function to use the "final character" of a
+ key (this is the only way to use a character position greater
+ than 126, incidentally).
+
+ For instance, the option `-k 1,2,4,6-10,'$'' generates a hash
+ function that considers positions 1,2,4,6,7,8,9,10, plus the
+ last character in each key (which may differ for each key,
+ obviously). Keys with length less than the indicated key
+ positions work properly, since selected key positions
+ exceeding the key length are simply not referenced in the
+ hash function.
+
+ `-K KEY NAME'
+ By default, the program assumes the structure component
+ identifier for the keyword is "name." This option allows an
+ arbitrary choice of identifier for this component, although
+ it still must occur as the first field in your supplied
+ `struct'.
+
+ `-l'
+ Compare key lengths before trying a string comparison. This
+ might cut down on the number of string comparisons made
+ during the lookup, since keys with different lengths are
+ never compared via `strcmp'. However, using `-l' might
+ greatly increase the size of the generated C code if the
+ lookup table range is large (which implies that the switch
+ option `-S' is not enabled), since the length table contains
+ as many elements as there are entries in the lookup table.
+
+ `-L GENERATED LANGUAGE NAME'
+ Instructs `gperf' to generate code in the language specified
+ by the option's argument. Languages handled are currently
+ C++ and C. The default is C.
+
+ `-n'
+ Instructs the generator not to include the length of a
+ keyword when computing its hash value. This may save a few
+ assembly instructions in the generated lookup table.
+
+ `-N LOOKUP FUNCTION NAME'
+ Allows you to specify the name for the generated lookup
+ function. Default name is `in_word_set.' This option
+ permits completely automatic generation of perfect hash
+ functions, especially when multiple generated hash functions
+ are used in the same application.
+
+ `-o'
+ Reorders the keywords by sorting the keywords so that
+ frequently occuring key position set components appear first.
+ A second reordering pass follows so that keys with "already
+ determined values" are placed towards the front of the
+ keylist. This may decrease the time required to generate a
+ perfect hash function for many keyword sets, and also produce
+ more minimal perfect hash functions. The reason for this is
+ that the reordering helps prune the search time by handling
+ inevitable collisions early in the search process. On the
+ other hand, if the number of keywords is *very* large using
+ `-o' may *increase* `gperf''s execution time, since
+ collisions will begin earlier and continue throughout the
+ remainder of keyword processing. See Cichelli's paper from
+ the January 1980 Communications of the ACM for details.
+
+ `-p'
+ Changes the return value of the generated function
+ `in_word_set' from boolean (*i.e.*, 0 or 1), to either type
+ "pointer to user-defined struct," (if the `-t' option is
+ enabled), or simply to `char *', if `-t' is not enabled.
+ This option is most useful when the `-t' option (allowing
+ user-defined structs) is used. For example, it is possible
+ to automatically generate the GNU C reserved word lookup
+ routine with the options `-p' and `-t'.
+
+ `-r'
+ Utilizes randomness to initialize the associated values
+ table. This frequently generates solutions faster than using
+ deterministic initialization (which starts all associated
+ values at 0). Furthermore, using the randomization option
+ generally increases the size of the table. If `gperf' has
+ difficultly with a certain keyword set try using `-r' or `-D'.
+
+ `-s SIZE-MULTIPLE'
+ Affects the size of the generated hash table. The numeric
+ argument for this option indicates "how many times larger or
+ smaller" the maximum associated value range should be, in
+ relationship to the number of keys. If the SIZE-MULTIPLE is
+ negative the maximum associated value is calculated by
+ *dividing* it into the total number of keys. For example, a
+ value of 3 means "allow the maximum associated value to be
+ about 3 times larger than the number of input keys."
+
+ Conversely, a value of -3 means "allow the maximum associated
+ value to be about 3 times smaller than the number of input
+ keys." Negative values are useful for limiting the overall
+ size of the generated hash table, though this usually
+ increases the number of duplicate hash values.
+
+ If `generate switch' option `-S' is *not* enabled, the maximum
+ associated value influences the static array table size, and
+ a larger table should decrease the time required for an
+ unsuccessful search, at the expense of extra table space.
+
+ The default value is 1, thus the default maximum associated
+ value about the same size as the number of keys (for
+ efficiency, the maximum associated value is always rounded up
+ to a power of 2). The actual table size may vary somewhat,
+ since this technique is essentially a heuristic. In
+ particular, setting this value too high slows down `gperf''s
+ runtime, since it must search through a much larger range of
+ values. Judicious use of the `-f' option helps alleviate this
+ overhead, however.
+
+ `-S TOTAL SWITCH STATEMENTS'
+ Causes the generated C code to use a `switch' statement
+ scheme, rather than an array lookup table. This can lead to
+ a reduction in both time and space requirements for some
+ keyfiles. The argument to this option determines how many
+ `switch' statements are generated. A value of 1 generates 1
+ `switch' containing all the elements, a value of 2 generates
+ 2 tables with 1/2 the elements in each `switch', etc. This
+ is useful since many C compilers cannot correctly generate
+ code for large `switch' statements. This option was inspired
+ in part by Keith Bostic's original C program.
+
+ `-t'
+ Allows you to include a `struct' type declaration for
+ generated code. Any text before a pair of consecutive %% is
+ consider part of the type declaration. Key words and
+ additional fields may follow this, one group of fields per
+ line. A set of examples for generating perfect hash tables
+ and functions for Ada, C, and G++, Pascal, and Modula 2 and 3
+ reserved words are distributed with this release.
+
+ `-T'
+ Prevents the transfer of the type declaration to the output
+ file. Use this option if the type is already defined
+ elsewhere.
+
+ `-v'
+ Prints out the current version number.
+
+ `-Z CLASS NAME'
+ Allow user to specify name of generated C++ class. Default
+ name is `Perfect_Hash'.
+
+
+File: gperf.info, Node: Bugs, Next: Projects, Prev: Options, Up: Top
+
+Known Bugs and Limitations with `gperf'
+***************************************
+
+ The following are some limitations with the current release of
+`gperf':
+
+ * The `gperf' utility is tuned to execute quickly, and works quickly
+ for small to medium size data sets (around 1000 keywords). It is
+ extremely useful for maintaining perfect hash functions for
+ compiler keyword sets. Several recent enhancements now enable
+ `gperf' to work efficiently on much larger keyword sets (over
+ 15,000 keywords). When processing large keyword sets it helps
+ greatly to have over 8 megs of RAM.
+
+ However, since `gperf' does not backtrack no guaranteed solution
+ occurs on every run. On the other hand, it is usually easy to
+ obtain a solution by varying the option parameters. In
+ particular, try the `-r' option, and also try changing the default
+ arguments to the `-s' and `-j' options. To *guarantee* a
+ solution, use the `-D' and `-S' options, although the final
+ results are not likely to be a *perfect* hash function anymore!
+ Finally, use the `-f' option if you want `gperf' to generate the
+ perfect hash function *fast*, with less emphasis on making it
+ minimal.
+
+ * The size of the generate static keyword array can get *extremely*
+ large if the input keyword file is large or if the keywords are
+ quite similar. This tends to slow down the compilation of the
+ generated C code, and *greatly* inflates the object code size. If
+ this situation occurs, consider using the `-S' option to reduce
+ data size, potentially increasing keyword recognition time a
+ negligible amount. Since many C compilers cannot correctly
+ generated code for large switch statements it is important to
+ qualify the -S option with an appropriate numerical argument that
+ controls the number of switch statements generated.
+
+ * The maximum number of key positions selected for a given key has an
+ arbitrary limit of 126. This restriction should be removed, and if
+ anyone considers this a problem write me and let me know so I can
+ remove the constraint.
+
+ * The C++ source code only compiles correctly with GNU G++, version
+ 1.36 (and hopefully later versions). Porting to AT&T cfront would
+ be tedious, but possible (and desirable). There is also a K&R C
+ version available now. This should compile without change on most
+ BSD systems, but may require a bit of work to run on SYSV, since
+ `gperf' uses ALLOCA in several places. Send mail to schmidt at
+ ics.uci.edu for information.
+
+
+File: gperf.info, Node: Projects, Next: Implementation, Prev: Bugs, Up: Top
+
+Things Still Left to Do
+***********************
+
+ It should be "relatively" easy to replace the current perfect hash
+function algorithm with a more exhaustive approach; the perfect hash
+module is essential independent from other program modules. Additional
+worthwhile improvements include:
+
+ * Make the algorithm more robust. At present, the program halts
+ with an error diagnostic if it can't find a direct solution and
+ the `-D' option is not enabled. A more comprehensive, albeit
+ computationally expensive, approach would employ backtracking or
+ enable alternative options and retry. It's not clear how helpful
+ this would be, in general, since most search sets are rather small
+ in practice.
+
+ * Another useful extension involves modifying the program to generate
+ "minimal" perfect hash functions (under certain circumstances, the
+ current version can be rather extravagant in the generated table
+ size). Again, this is mostly of theoretical interest, since a
+ sparse table often produces faster lookups, and use of the `-S'
+ `switch' option can minimize the data size, at the expense of
+ slightly longer lookups (note that the gcc compiler generally
+ produces good code for `switch' statements, reducing the need for
+ more complex schemes).
+
+ * In addition to improving the algorithm, it would also be useful to
+ generate a C++ class or Ada package as the code output, in
+ addition to the current C routines.
+
+
+File: gperf.info, Node: Implementation, Next: Bibliography, Prev: Projects, Up: Top
+
+Implementation Details of GNU `gperf'
+*************************************
+
+ A paper describing the high-level description of the data structures
+and algorithms used to implement `gperf' will soon be available. This
+paper is useful not only from a maintenance and enhancement perspective,
+but also because they demonstrate several clever and useful programming
+techniques, *e.g.*, `Iteration Number' boolean arrays, double hashing,
+a "safe" and efficient method for reading arbitrarily long input from a
+file, and a provably optimal algorithm for simultaneously determining
+both the minimum and maximum elements in a list.
+
+
+File: gperf.info, Node: Bibliography, Prev: Implementation, Up: Top
+
+Bibliography
+************
+
+ [1] Chang, C.C.: A Scheme for Constructing Ordered Minimal Perfect
+Hashing Functions Information Sciences 39(1986), 187-195.
+
+ [2] Cichelli, Richard J. Author's Response to "On Cichelli's Minimal
+Perfec t Hash Functions Method" Communications of the ACM, 23,
+12(December 1980), 729.
+
+ [3] Cichelli, Richard J. Minimal Perfect Hash Functions Made Simple
+Communications of the ACM, 23, 1(January 1980), 17-19.
+
+ [4] Cook, C. R. and Oldehoeft, R.R. A Letter Oriented Minimal
+Perfect Hashing Function SIGPLAN Notices, 17, 9(September 1982), 18-27.
+
+ [5] Cormack, G. V. and Horspool, R. N. S. and Kaiserwerth, M.
+Practical Perfect Hashing Computer Journal, 28, 1(January 1985), 54-58.
+
+ [6] Jaeschke, G. Reciprocal Hashing: A Method for Generating Minimal
+Perfect Hashing Functions Communications of the ACM, 24, 12(December
+1981), 829-833.
+
+ [7] Jaeschke, G. and Osterburg, G. On Cichelli's Minimal Perfect
+Hash Functions Method Communications of the ACM, 23, 12(December 1980),
+728-729.
+
+ [8] Sager, Thomas J. A Polynomial Time Generator for Minimal Perfect
+Hash Functions Communications of the ACM, 28, 5(December 1985), 523-532
+
+ [9] Schmidt, Douglas C. GPERF: A Perfect Hash Function Generator
+Second USENIX C++ Conference Proceedings, April 1990.
+
+ [10] Sebesta, R.W. and Taylor, M.A. Minimal Perfect Hash Functions
+for Reserved Word Lists SIGPLAN Notices, 20, 12(September 1985), 47-53.
+
+ [11] Sprugnoli, R. Perfect Hashing Functions: A Single Probe
+Retrieving Method for Static Sets Communications of the ACM, 20
+11(November 1977), 841-850.
+
+ [12] Stallman, Richard M. Using and Porting GNU CC Free Software
+Foundation, 1988.
+
+ [13] Stroustrup, Bjarne The C++ Programming Language.
+Addison-Wesley, 1986.
+
+ [14] Tiemann, Michael D. User's Guide to GNU C++ Free Software
+Foundation, 1989.
+
+
+
+Tag Table:
+Node: Top1218
+Node: Copying2456
+Node: Contributors15759
+Node: Motivation16859
+Node: Search Structures18126
+Node: Description21679
+Node: Input Format23499
+Node: Declarations24294
+Node: Keywords26601
+Node: Functions28192
+Node: Output Format28686
+Node: Options31156
+Node: Bugs44526
+Node: Projects47213
+Node: Implementation48790
+Node: Bibliography49509
+
+End Tag Table
diff --git a/apps/gperf/gperf.texi b/apps/gperf/gperf.texi
new file mode 100644
index 00000000000..649d05f7ec6
--- /dev/null
+++ b/apps/gperf/gperf.texi
@@ -0,0 +1,1184 @@
+\input texinfo @c -*-texinfo-*-
+
+@settitle User's Guide to @code{gperf}
+@setfilename gperf.info
+
+@ifinfo
+@format
+START-INFO-DIR-ENTRY
+* Gperf: (gperf). Perfect Hash Function Generator.
+END-INFO-DIR-ENTRY
+@end format
+@end ifinfo
+
+@ifinfo
+This file documents the features of the GNU Perfect Hash Function Generator
+
+Copyright (C) 1989 Free Software Foundation, Inc.
+
+Permission is granted to make and distribute verbatim copies of
+this manual provided the copyright notice and this permission notice
+are preserved on all copies.
+
+@ignore
+Permission is granted to process this file through @TeX{} and print the
+results, provided the printed document carries copying permission
+notice identical to this one except for the removal of this paragraph
+(this paragraph not being relevant to the printed manual).
+
+@end ignore
+
+Permission is granted to copy and distribute modified versions of this
+manual under the conditions for verbatim copying, provided also that the
+section entitled ``GNU General Public License'' is included exactly as
+in the original, and provided that the entire resulting derived work is
+distributed under the terms of a permission notice identical to this one.
+
+Permission is granted to copy and distribute translations of this manual
+into another language, under the above conditions for modified versions,
+except that the section entitled ``GNU @code{gperf} General Public License'' an
+d
+this permission notice may be included in translations approved by the
+Free Software Foundation instead of in the original English.
+@end ifinfo
+
+@setchapternewpage odd
+
+@titlepage
+@center @titlefont{User's Guide}
+@sp 2
+@center @titlefont{for the}
+@sp 2
+@center @titlefont{GNU GPERF Utility}
+@sp 4
+@center Douglas C. Schmidt
+@sp 3
+@center last updated 1 November 1989
+@sp 1
+@center for version 2.0
+@page
+@vskip 0pt plus 1filll
+Copyright @copyright{} 1989 Free Software Foundation, Inc.
+
+
+Permission is granted to make and distribute verbatim copies of
+this manual provided the copyright notice and this permission notice
+are preserved on all copies.
+
+Permission is granted to copy and distribute modified versions of this
+manual under the conditions for verbatim copying, provided also that the
+section entitled ``GNU @code{gperf} General Public License'' is included exactl
+y as
+in the original, and provided that the entire resulting derived work is
+distributed under the terms of a permission notice identical to this one.
+
+Permission is granted to copy and distribute translations of this manual
+into another language, under the above conditions for modified versions,
+except that the section entitled ``GNU @code{gperf} General Public License'' ma
+y be
+included in a translation approved by the author instead of in the original
+English.
+@end titlepage
+
+@ifinfo
+@node Top, Copying, (dir), (dir)
+@ichapter Introduction
+
+This manual documents the GNU @code{gperf} perfect hash function generator
+utility, focusing on its features and how to use them, and how to report
+bugs.
+
+@end ifinfo
+@menu
+* Copying:: GNU @code{gperf} General Public License says
+ how you can copy and share @code{gperf}.
+* Contributors:: People who have contributed to @code{gperf}.
+* Motivation:: Static search structures and GNU GPERF.
+* Search Structures:: Static search structures and GNU @code{gperf}
+* Description:: High-level discussion of how GPERF functions.
+* Options:: A description of options to the program.
+* Bugs:: Known bugs and limitations with GPERF.
+* Projects:: Things still left to do.
+* Implementation:: Implementation Details for GNU GPERF.
+* Bibliography:: Material Referenced in this Report.
+
+ --- The Detailed Node Listing ---
+
+High-Level Description of GNU @code{gperf}
+
+* Input Format:: Input Format to @code{gperf}
+* Output Format:: Output Format for Generated C Code with @code{gperf}
+
+Input Format to @code{gperf}
+
+* Declarations:: @code{struct} Declarations and C Code Inclusion.
+* Keywords:: Format for Keyword Entries.
+* Functions:: Including Additional C Functions.
+@end menu
+
+@node Copying, Contributors, Top, Top
+@unnumbered GNU GENERAL PUBLIC LICENSE
+@center Version 1, February 1989
+
+@display
+Copyright @copyright{} 1989 Free Software Foundation, Inc.
+675 Mass Ave, Cambridge, MA 02139, USA
+
+Everyone is permitted to copy and distribute verbatim copies
+of this license document, but changing it is not allowed.
+@end display
+
+@unnumberedsec Preamble
+
+ The license agreements of most software companies try to keep users
+at the mercy of those companies. By contrast, our General Public
+License is intended to guarantee your freedom to share and change free
+software---to make sure the software is free for all its users. The
+General Public License applies to the Free Software Foundation's
+software and to any other program whose authors commit to using it.
+You can use it for your programs, too.
+
+ When we speak of free software, we are referring to freedom, not
+price. Specifically, the General Public License is designed to make
+sure that you have the freedom to give away or sell copies of free
+software, that you receive source code or can get it if you want it,
+that you can change the software or use pieces of it in new free
+programs; and that you know you can do these things.
+
+ To protect your rights, we need to make restrictions that forbid
+anyone to deny you these rights or to ask you to surrender the rights.
+These restrictions translate to certain responsibilities for you if you
+distribute copies of the software, or if you modify it.
+
+ For example, if you distribute copies of a such a program, whether
+gratis or for a fee, you must give the recipients all the rights that
+you have. You must make sure that they, too, receive or can get the
+source code. And you must tell them their rights.
+
+ We protect your rights with two steps: (1) copyright the software, and
+(2) offer you this license which gives you legal permission to copy,
+distribute and/or modify the software.
+
+ Also, for each author's protection and ours, we want to make certain
+that everyone understands that there is no warranty for this free
+software. If the software is modified by someone else and passed on, we
+want its recipients to know that what they have is not the original, so
+that any problems introduced by others will not reflect on the original
+authors' reputations.
+
+ The precise terms and conditions for copying, distribution and
+modification follow.
+
+@iftex
+@unnumberedsec TERMS AND CONDITIONS
+@end iftex
+@ifinfo
+@center TERMS AND CONDITIONS
+@end ifinfo
+
+@enumerate
+@item
+This License Agreement applies to any program or other work which
+contains a notice placed by the copyright holder saying it may be
+distributed under the terms of this General Public License. The
+``Program'', below, refers to any such program or work, and a ``work based
+on the Program'' means either the Program or any work containing the
+Program or a portion of it, either verbatim or with modifications. Each
+licensee is addressed as ``you''.
+
+@item
+You may copy and distribute verbatim copies of the Program's source
+code as you receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice and
+disclaimer of warranty; keep intact all the notices that refer to this
+General Public License and to the absence of any warranty; and give any
+other recipients of the Program a copy of this General Public License
+along with the Program. You may charge a fee for the physical act of
+transferring a copy.
+
+@item
+You may modify your copy or copies of the Program or any portion of
+it, and copy and distribute such modifications under the terms of Paragraph
+1 above, provided that you also do the following:
+
+@itemize @bullet
+@item
+cause the modified files to carry prominent notices stating that
+you changed the files and the date of any change; and
+
+@item
+cause the whole of any work that you distribute or publish, that
+in whole or in part contains the Program or any part thereof, either
+with or without modifications, to be licensed at no charge to all
+third parties under the terms of this General Public License (except
+that you may choose to grant warranty protection to some or all
+third parties, at your option).
+
+@item
+If the modified program normally reads commands interactively when
+run, you must cause it, when started running for such interactive use
+in the simplest and most usual way, to print or display an
+announcement including an appropriate copyright notice and a notice
+that there is no warranty (or else, saying that you provide a
+warranty) and that users may redistribute the program under these
+conditions, and telling the user how to view a copy of this General
+Public License.
+
+@item
+You may charge a fee for the physical act of transferring a
+copy, and you may at your option offer warranty protection in
+exchange for a fee.
+@end itemize
+
+Mere aggregation of another independent work with the Program (or its
+derivative) on a volume of a storage or distribution medium does not bring
+the other work under the scope of these terms.
+
+@item
+You may copy and distribute the Program (or a portion or derivative of
+it, under Paragraph 2) in object code or executable form under the terms of
+Paragraphs 1 and 2 above provided that you also do one of the following:
+
+@itemize @bullet
+@item
+accompany it with the complete corresponding machine-readable
+source code, which must be distributed under the terms of
+Paragraphs 1 and 2 above; or,
+
+@item
+accompany it with a written offer, valid for at least three
+years, to give any third party free (except for a nominal charge
+for the cost of distribution) a complete machine-readable copy of the
+corresponding source code, to be distributed under the terms of
+Paragraphs 1 and 2 above; or,
+
+@item
+accompany it with the information you received as to where the
+corresponding source code may be obtained. (This alternative is
+allowed only for noncommercial distribution and only if you
+received the program in object code or executable form alone.)
+@end itemize
+
+Source code for a work means the preferred form of the work for making
+modifications to it. For an executable file, complete source code means
+all the source code for all modules it contains; but, as a special
+exception, it need not include source code for modules which are standard
+libraries that accompany the operating system on which the executable
+file runs, or for standard header files or definitions files that
+accompany that operating system.
+
+@item
+You may not copy, modify, sublicense, distribute or transfer the
+Program except as expressly provided under this General Public License.
+Any attempt otherwise to copy, modify, sublicense, distribute or transfer
+the Program is void, and will automatically terminate your rights to use
+the Program under this License. However, parties who have received
+copies, or rights to use copies, from you under this General Public
+License will not have their licenses terminated so long as such parties
+remain in full compliance.
+
+@item
+By copying, distributing or modifying the Program (or any work based
+on the Program) you indicate your acceptance of this license to do so,
+and all its terms and conditions.
+
+@item
+Each time you redistribute the Program (or any work based on the
+Program), the recipient automatically receives a license from the original
+licensor to copy, distribute or modify the Program subject to these
+terms and conditions. You may not impose any further restrictions on the
+recipients' exercise of the rights granted herein.
+
+@item
+The Free Software Foundation may publish revised and/or new versions
+of the General Public License from time to time. Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program
+specifies a version number of the license which applies to it and ``any
+later version'', you have the option of following the terms and conditions
+either of that version or of any later version published by the Free
+Software Foundation. If the Program does not specify a version number of
+the license, you may choose any version ever published by the Free Software
+Foundation.
+
+@item
+If you wish to incorporate parts of the Program into other free
+programs whose distribution conditions are different, write to the author
+to ask for permission. For software which is copyrighted by the Free
+Software Foundation, write to the Free Software Foundation; we sometimes
+make exceptions for this. Our decision will be guided by the two goals
+of preserving the free status of all derivatives of our free software and
+of promoting the sharing and reuse of software generally.
+
+@iftex
+@heading NO WARRANTY
+@end iftex
+@ifinfo
+@center NO WARRANTY
+@end ifinfo
+
+@item
+BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
+FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
+OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
+PROVIDE THE PROGRAM ``AS IS'' WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
+OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
+TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
+PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
+REPAIR OR CORRECTION.
+
+@item
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL
+ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
+REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
+INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES
+ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT
+LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES
+SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE
+WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+@end enumerate
+
+@iftex
+@heading END OF TERMS AND CONDITIONS
+@end iftex
+@ifinfo
+@center END OF TERMS AND CONDITIONS
+@end ifinfo
+
+@page
+@unnumberedsec Appendix: How to Apply These Terms to Your New Programs
+
+ If you develop a new program, and you want it to be of the greatest
+possible use to humanity, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these
+terms.
+
+ To do so, attach the following notices to the program. It is safest to
+attach them to the start of each source file to most effectively convey
+the exclusion of warranty; and each file should have at least the
+``copyright'' line and a pointer to where the full notice is found.
+
+@smallexample
+@var{one line to give the program's name and a brief idea of what it does.}
+Copyright (C) 19@var{yy} @var{name of author}
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 1, or (at your option)
+any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+@end smallexample
+
+Also add information on how to contact you by electronic and paper mail.
+
+If the program is interactive, make it output a short notice like this
+when it starts in an interactive mode:
+
+@smallexample
+Gnomovision version 69, Copyright (C) 19@var{yy} @var{name of author}
+Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+This is free software, and you are welcome to redistribute it
+under certain conditions; type `show c' for details.
+@end smallexample
+
+The hypothetical commands `show w' and `show c' should show the
+appropriate parts of the General Public License. Of course, the
+commands you use may be called something other than `show w' and `show
+c'; they could even be mouse-clicks or menu items---whatever suits your
+program.
+
+You should also get your employer (if you work as a programmer) or your
+school, if any, to sign a ``copyright disclaimer'' for the program, if
+necessary. Here a sample; alter the names:
+
+@example
+Yoyodyne, Inc., hereby disclaims all copyright interest in the
+program `Gnomovision' (a program to direct compilers to make passes
+at assemblers) written by James Hacker.
+
+@var{signature of Ty Coon}, 1 April 1989
+Ty Coon, President of Vice
+@end example
+
+That's all there is to it!
+
+@node Contributors, Motivation, Copying, Top
+@unnumbered Contributors to GNU @code{gperf} Utility
+
+@itemize @bullet
+@item
+The GNU @code{gperf} perfect hash function generator utility was
+originally written in GNU C++ by Douglas C. Schmidt. It is now also
+available in a highly-portable ``old-style'' C version. The general
+idea for the perfect hash function generator was inspired by Keith
+Bostic's algorithm written in C, and distributed to net.sources around
+1984. The current program is a heavily modified, enhanced, and extended
+implementation of Keith's basic idea, created at the University of
+California, Irvine. Bugs, patches, and suggestions should be reported
+to schmidt at ics.uci.edu.
+
+@item
+Special thanks is extended to Michael Tiemann and Doug Lea, for
+providing a useful compiler, and for giving me a forum to exhibit my
+creation.
+
+In addition, Adam de Boor and Nels Olson provided many tips and insights
+that greatly helped improve the quality and functionality of @code{gperf}.
+@end itemize
+
+@node Motivation, Search Structures, Contributors, Top
+@chapter Introduction
+
+@code{gperf} is a perfect hash function generator written in C++. It
+transforms an @emph{n} element user-specified keyword set @emph{W} into
+a perfect hash function @emph{F}. @emph{F} uniquely maps keywords in
+@emph{W} onto the range 0..@emph{k}, where @emph{k} >= @emph{n}. If
+@emph{k = n} then @emph{F} is a @emph{minimal} perfect hash function.
+@code{gperf} generates a 0..@emph{k} element static lookup table and a
+pair of C functions. These functions determine whether a given
+character string @emph{s} occurs in @emph{W}, using at most one probe
+into the lookup table.
+
+@code{gperf} currently generates the reserved keyword recognizer for
+lexical analyzers in several production and research compilers and
+language processing tools, including GNU C, GNU C++, GNU Pascal, GNU
+Modula 3, and GNU indent. Complete C++ source code for @code{gperf} is
+available via anonymous ftp from ics.uci.edu. @code{gperf} also is
+distributed along with the GNU libg++ library. A highly portable,
+functionally equivalent K&R C version of @code{gperf} is archived in
+comp.sources.unix, volume 20. Finally, a paper describing
+@code{gperf}'s design and implementation in greater detail is available
+in the Second USENIX C++ Conference proceedings.
+
+@node Search Structures, Description, Motivation, Top
+@chapter Static search structures and GNU @code{gperf}
+
+A @dfn{static search structure} is an Abstract Data Type with certain
+fundamental operations, @emph{e.g.}, @emph{initialize}, @emph{insert},
+and @emph{retrieve}. Conceptually, all insertions occur before any
+retrievals. In practice, @code{gperf} generates a @code{static} array
+containing search set keywords and any associated attributes specified
+by the user. Thus, there is essentially no execution-time cost for the
+insertions. It is a useful data structure for representing @emph{static
+search sets}. Static search sets occur frequently in software system
+applications. Typical static search sets include compiler reserved
+words, assembler instruction opcodes, and built-in shell interpreter
+commands. Search set members, called @dfn{keywords}, are inserted into
+the structure only once, usually during program initialization, and are
+not generally modified at run-time.
+
+Numerous static search structure implementations exist, @emph{e.g.},
+arrays, linked lists, binary search trees, digital search tries, and
+hash tables. Different approaches offer trade-offs between space
+utilization and search time efficiency. For example, an @emph{n} element
+sorted array is space efficient, though the average-case time
+complexity for retrieval operations using binary search is
+proportional to log @emph{n}. Conversely, hash table implementations
+often locate a table entry in constant time, but typically impose
+additional memory overhead and exhibit poor worst case performance.
+
+
+@emph{Minimal perfect hash functions} provide an optimal solution for a
+particular class of static search sets. A minimal perfect hash
+function is defined by two properties:
+
+@itemize @bullet
+@item
+It allows keyword recognition in a static search set using at most
+@emph{one} probe into the hash table. This represents the ``perfect''
+property.
+@item
+The actual memory allocated to store the keywords is precisely large
+enough for the keyword set, and @emph{no larger}. This is the
+``minimal'' property.
+@end itemize
+
+For most applications it is far easier to generate @emph{perfect} hash
+functions than @emph{minimal perfect} hash functions. Moreover,
+non-minimal perfect hash functions frequently execute faster than
+minimal ones in practice. This phenomena occurs since searching a
+sparse keyword table increases the probability of locating a ``null''
+entry, thereby reducing string comparisons. @code{gperf}'s default
+behavior generates @emph{near-minimal} perfect hash functions for
+keyword sets. However, @code{gperf} provides many options that permit
+user control over the degree of minimality and perfection.
+
+Static search sets often exhibit relative stability over time. For
+example, Ada's 63 reserved words have remained constant for nearly a
+decade. It is therefore frequently worthwhile to expend concerted
+effort building an optimal search structure @emph{once}, if it
+subsequently receives heavy use multiple times. @code{gperf} removes
+the drudgery associated with constructing time- and space-efficient
+search structures by hand. It has proven a useful and practical tool
+for serious programming projects. Output from @code{gperf} is currently
+used in several production and research compilers, including GNU C, GNU
+C++, GNU Pascal, and GNU Modula 3. The latter two compilers are not yet
+part of the official GNU distribution. Each compiler utilizes
+@code{gperf} to automatically generate static search structures that
+efficiently identify their respective reserved keywords.
+
+@node Description, Options, Search Structures, Top
+@chapter High-Level Description of GNU @code{gperf}
+
+@menu
+* Input Format:: Input Format to @code{gperf}
+* Output Format:: Output Format for Generated C Code with @code{gperf}
+@end menu
+
+The perfect hash function generator @code{gperf} reads a set of
+``keywords'' from a @dfn{keyfile} (or from the standard input by
+default). It attempts to derive a perfect hashing function that
+recognizes a member of the @dfn{static keyword set} with at most a
+single probe into the lookup table. If @code{gperf} succeeds in
+generating such a function it produces a pair of C source code routines
+that perform hashing and table lookup recognition. All generated C code
+is directed to the standard output. Command-line options described
+below allow you to modify the input and output format to @code{gperf}.
+
+By default, @code{gperf} attempts to produce time-efficient code, with
+less emphasis on efficient space utilization. However, several options
+exist that permit trading-off execution time for storage space and vice
+versa. In particular, expanding the generated table size produces a
+sparse search structure, generally yielding faster searches.
+Conversely, you can direct @code{gperf} to utilize a C @code{switch}
+statement scheme that minimizes data space storage size. Furthermore,
+using a C @code{switch} may actually speed up the keyword retrieval time
+somewhat. Actual results depend on your C compiler, of course.
+
+In general, @code{gperf} assigns values to the characters it is using
+for hashing until some set of values gives each keyword a unique value.
+A helpful heuristic is that the larger the hash value range, the easier
+it is for @code{gperf} to find and generate a perfect hash function.
+Experimentation is the key to getting the most from @code{gperf}.
+
+@node Input Format, Output Format, Description, Description
+@section Input Format to @code{gperf}
+
+You can control the input keyfile format by varying certain command-line
+arguments, in particular the @samp{-t} option. The input's appearance
+is similar to GNU utilities @code{flex} and @code{bison} (or UNIX
+utilities @code{lex} and @code{yacc}). Here's an outline of the general
+format:
+
+@example
+@group
+declarations
+%%
+keywords
+%%
+functions
+@end group
+@end example
+
+@emph{Unlike} @code{flex} or @code{bison}, all sections of @code{gperf}'s input
+are optional. The following sections describe the input format for each
+section.
+
+@menu
+* Declarations:: @code{struct} Declarations and C Code Inclusion.
+* Keywords:: Format for Keyword Entries.
+* Functions:: Including Additional C Functions.
+@end menu
+
+@node Declarations, Keywords, Input Format, Input Format
+@subsection @code{struct} Declarations and C Code Inclusion
+
+The keyword input file optionally contains a section for including
+arbitrary C declarations and definitions, as well as provisions for
+providing a user-supplied @code{struct}. If the @samp{-t} option
+@emph{is} enabled, you @emph{must} provide a C @code{struct} as the last
+component in the declaration section from the keyfile file. The first
+field in this struct must be a @code{char *} identifier called ``name,''
+although it is possible to modify this field's name with the @samp{-K}
+option described below.
+
+Here is simple example, using months of the year and their attributes as
+input:
+
+@example
+@group
+struct months @{ char *name; int number; int days; int leap_days; @};
+%%
+january, 1, 31, 31
+february, 2, 28, 29
+march, 3, 31, 31
+april, 4, 30, 30
+may, 5, 31, 31
+june, 6, 30, 30
+july, 7, 31, 31
+august, 8, 31, 31
+september, 9, 30, 30
+october, 10, 31, 31
+november, 11, 30, 30
+december, 12, 31, 31
+@end group
+@end example
+
+Separating the @code{struct} declaration from the list of key words and
+other fields are a pair of consecutive percent signs, @code{%%},
+appearing left justified in the first column, as in the UNIX utility
+@code{lex}.
+
+Using a syntax similar to GNU utilities @code{flex} and @code{bison}, it
+is possible to directly include C source text and comments verbatim into
+the generated output file. This is accomplished by enclosing the region
+inside left-justified surrounding @code{%@{}, @code{%@}} pairs. Here is
+an input fragment based on the previous example that illustrates this
+feature:
+
+@example
+@group
+%@{
+#include <assert.h>
+/* This section of code is inserted directly into the output. */
+int return_month_days (struct months *months, int is_leap_year);
+%@}
+struct months @{ char *name; int number; int days; int leap_days; @};
+%%
+january, 1, 31, 31
+february, 2, 28, 29
+march, 3, 31, 31
+...
+@end group
+@end example
+
+It is possible to omit the declaration section entirely. In this case
+the keyfile begins directly with the first keyword line, @emph{e.g.}:
+
+@example
+@group
+january, 1, 31, 31
+february, 2, 28, 29
+march, 3, 31, 31
+april, 4, 30, 30
+...
+@end group
+@end example
+
+@node Keywords, Functions, Declarations, Input Format
+@subsection Format for Keyword Entries
+
+The second keyfile format section contains lines of keywords and any
+associated attributes you might supply. A line beginning with @samp{#}
+in the first column is considered a comment. Everything following the
+@samp{#} is ignored, up to and including the following newline.
+
+The first field of each non-comment line is always the key itself. It
+should be given as a simple name, @emph{i.e.}, without surrounding
+string quotation marks, and be left-justified flush against the first
+column. In this context, a ``field'' is considered to extend up to, but
+not include, the first blank, comma, or newline. Here is a simple
+example taken from a partial list of C reserved words:
+
+@example
+@group
+# These are a few C reserved words, see the c.@code{gperf} file
+# for a complete list of ANSI C reserved words.
+unsigned
+sizeof
+switch
+signed
+if
+default
+for
+while
+return
+@end group
+@end example
+
+Note that unlike @code{flex} or @code{bison} the first @code{%%} marker
+may be elided if the declaration section is empty.
+
+Additional fields may optionally follow the leading keyword. Fields
+should be separated by commas, and terminate at the end of line. What
+these fields mean is entirely up to you; they are used to initialize the
+elements of the user-defined @code{struct} provided by you in the
+declaration section. If the @samp{-t} option is @emph{not} enabled
+these fields are simply ignored. All previous examples except the last
+one contain keyword attributes.
+
+@node Functions, , Keywords, Input Format
+@subsection Including Additional C Functions
+
+The optional third section also corresponds closely with conventions
+found in @code{flex} and @code{bison}. All text in this section,
+starting at the final @code{%%} and extending to the end of the input
+file, is included verbatim into the generated output file. Naturally,
+it is your responsibility to ensure that the code contained in this
+section is valid C.
+
+@node Output Format, , Input Format, Description
+@section Output Format for Generated C Code with @code{gperf}
+
+Several options control how the generated C code appears on the standard
+output. Two C function are generated. They are called @code{hash} and
+@code{in_word_set}, although you may modify the name for
+@code{in_word_set} with a command-line option. Both functions require
+two arguments, a string, @code{char *} @var{str}, and a length
+parameter, @code{int} @var{len}. Their default function prototypes are
+as follows:
+
+@example
+@group
+static int hash (char *str, int len);
+int in_word_set (char *str, int len);
+@end group
+@end example
+
+By default, the generated @code{hash} function returns an integer value
+created by adding @var{len} to several user-specified @var{str} key
+positions indexed into an @dfn{associated values} table stored in a
+local static array. The associated values table is constructed
+internally by @code{gperf} and later output as a static local C array called
+@var{hash_table}; its meaning and properties are described below.
+@xref{Implementation}. The relevant key positions are specified via the
+@samp{-k} option when running @code{gperf}, as detailed in the @emph{Options}
+section below. @xref{Options}.
+
+Two options, @samp{-g} (assume you are compiling with GNU C and its
+@code{inline} feature) and @samp{-a} (assume ANSI C-style function
+prototypes), alter the content of both the generated @code{hash} and
+@code{in_word_set} routines. However, function @code{in_word_set} may
+be modified more extensively, in response to your option settings. The
+options that affect the @code{in_word_set} structure are:
+
+@itemize @bullet
+@table @samp
+@item -p
+Have function @code{in_word_set} return a pointer rather than a boolean.
+
+@item -t
+Make use of the user-defined @code{struct}.
+
+@item -S @var{total switch statements}
+Generate 1 or more C @code{switch} statement rather than use a large,
+(and potentially sparse) static array. Although the exact time and
+space savings of this approach vary according to your C compiler's
+degree of optimization, this method often results in smaller and faster
+code.
+@end table
+@end itemize
+
+If the @samp{-t}, @samp{-S}, and @samp{-p} options are omitted the
+default action is to generate a @code{char *} array containing the keys,
+together with additional null strings used for padding the array. By
+experimenting with the various input and output options, and timing the
+resulting C code, you can determine the best option choices for
+different keyword set characteristics.
+
+@node Options, Bugs, Description, Top
+@chapter Options to the @code{gperf} Utility
+
+There are @emph{many} options to @code{gperf}. They were added to make
+the program more convenient for use with real applications. ``On-line''
+help is readily available via the @samp{-h} option. Other options
+include:
+
+@itemize @bullet
+@table @samp
+@item -a
+Generate ANSI Standard C code using function prototypes. The default is
+to use ``classic'' K&R C function declaration syntax.
+
+@item -c
+Generates C code that uses the @code{strncmp} function to perform
+string comparisons. The default action is to use @code{strcmp}.
+
+@item -C
+Makes the contents of all generated lookup tables constant, @emph{i.e.},
+``readonly.'' Many compilers can generate more efficient code for this
+by putting the tables in readonly memory.
+
+@item -d
+Enables the debugging option. This produces verbose diagnostics to
+``standard error'' when @code{gperf} is executing. It is useful both for
+maintaining the program and for determining whether a given set of
+options is actually speeding up the search for a solution. Some useful
+information is dumped at the end of the program when the @samp{-d}
+option is enabled.
+
+@item -D
+Handle keywords whose key position sets hash to duplicate values.
+Duplicate hash values occur for two reasons:
+
+@itemize @bullet
+@item
+Since @code{gperf} does not backtrack it is possible for it to process
+all your input keywords without finding a unique mapping for each word.
+However, frequently only a very small number of duplicates occur, and
+the majority of keys still require one probe into the table.
+@item
+Sometimes a set of keys may have the same names, but possess different
+attributes. With the -D option @code{gperf} treats all these keys as part of
+an equivalence class and generates a perfect hash function with multiple
+comparisons for duplicate keys. It is up to you to completely
+disambiguate the keywords by modifying the generated C code. However,
+@code{gperf} helps you out by organizing the output.
+@end itemize
+
+Option @samp{-D} is extremely useful for certain large or highly
+redundant keyword sets, @emph{i.e.}, assembler instruction opcodes.
+Using this option usually means that the generated hash function is no
+longer perfect. On the other hand, it permits @code{gperf} to work on
+keyword sets that it otherwise could not handle.
+
+@item -e @var{keyword delimiter list}
+Allows the user to provide a string containing delimiters used to
+separate keywords from their attributes. The default is ",\n". This
+option is essential if you want to use keywords that have embedded
+commas or newlines. One useful trick is to use -e'TAB', where TAB is
+the literal tab character.
+
+@item -E
+Define constant values using an enum local to the lookup function rather
+than with #defines. This also means that different lookup functions can
+reside in the same file. Thanks to James Clark (jjc at ai.mit.edu).
+
+@item -f @var{iteration amount}
+Generate the perfect hash function ``fast.'' This decreases @code{gperf}'s
+running time at the cost of minimizing generated table-size. The
+iteration amount represents the number of times to iterate when
+resolving a collision. `0' means `iterate by the number of keywords.
+This option is probably most useful when used in conjunction with options
+@samp{-D} and/or @samp{-S} for @emph{large} keyword sets.
+
+@item -g
+Assume a GNU compiler, @emph{e.g.}, @code{g++} or @code{gcc}. This
+makes all generated routines use the ``inline'' keyword to remove the
+cost of function calls. Note that @samp{-g} does @emph{not} imply
+@samp{-a}, since other non-ANSI C compilers may have provisions for a
+function @code{inline} feature.
+
+@item -G
+Generate the static table of keywords as a static global variable,
+rather than hiding it inside of the lookup function (which is the
+default behavior).
+
+@item -h
+Prints a short summary on the meaning of each program option. Aborts
+further program execution.
+
+@item -H @var{hash function name}
+Allows you to specify the name for the generated hash function. Default
+name is `hash.' This option permits the use of two hash tables in the
+same file.
+
+@item -i @var{initial value}
+Provides an initial @var{value} for the associate values array. Default
+is 0. Increasing the initial value helps inflate the final table size,
+possibly leading to more time efficient keyword lookups. Note that this
+option is not particularly useful when @samp{-S} is used. Also,
+@samp{-i} is overriden when the @samp{-r} option is used.
+
+@item -j @var{jump value}
+Affects the ``jump value,'' @emph{i.e.}, how far to advance the
+associated character value upon collisions. @var{Jump value} is rounded
+up to an odd number, the default is 5. If the @var{jump value} is 0 @code{gper
+f}
+jumps by random amounts.
+
+@item -k @var{keys}
+Allows selection of the character key positions used in the keywords'
+hash function. The allowable choices range between 1-126, inclusive.
+The positions are separated by commas, @emph{e.g.}, @samp{-k 9,4,13,14};
+ranges may be used, @emph{e.g.}, @samp{-k 2-7}; and positions may occur
+in any order. Furthermore, the meta-character '*' causes the generated
+hash function to consider @strong{all} character positions in each key,
+whereas '$' instructs the hash function to use the ``final character''
+of a key (this is the only way to use a character position greater than
+126, incidentally).
+
+For instance, the option @samp{-k 1,2,4,6-10,'$'} generates a hash
+function that considers positions 1,2,4,6,7,8,9,10, plus the last
+character in each key (which may differ for each key, obviously). Keys
+with length less than the indicated key positions work properly, since
+selected key positions exceeding the key length are simply not
+referenced in the hash function.
+
+@item -K @var{key name}
+By default, the program assumes the structure component identifier for
+the keyword is ``name.'' This option allows an arbitrary choice of
+identifier for this component, although it still must occur as the first
+field in your supplied @code{struct}.
+
+@item -l
+Compare key lengths before trying a string comparison. This might cut
+down on the number of string comparisons made during the lookup, since
+keys with different lengths are never compared via @code{strcmp}.
+However, using @samp{-l} might greatly increase the size of the
+generated C code if the lookup table range is large (which implies that
+the switch option @samp{-S} is not enabled), since the length table
+contains as many elements as there are entries in the lookup table.
+
+@item -L @var{generated language name}
+Instructs @code{gperf} to generate code in the language specified by the
+option's argument. Languages handled are currently C++ and C. The
+default is C.
+
+@item -n
+Instructs the generator not to include the length of a keyword when
+computing its hash value. This may save a few assembly instructions in
+the generated lookup table.
+
+@item -N @var{lookup function name}
+Allows you to specify the name for the generated lookup function.
+Default name is `in_word_set.' This option permits completely automatic
+generation of perfect hash functions, especially when multiple generated
+hash functions are used in the same application.
+
+@item -o
+Reorders the keywords by sorting the keywords so that frequently
+occuring key position set components appear first. A second reordering
+pass follows so that keys with ``already determined values'' are placed
+towards the front of the keylist. This may decrease the time required
+to generate a perfect hash function for many keyword sets, and also
+produce more minimal perfect hash functions. The reason for this is
+that the reordering helps prune the search time by handling inevitable
+collisions early in the search process. On the other hand, if the
+number of keywords is @emph{very} large using @samp{-o} may
+@emph{increase} @code{gperf}'s execution time, since collisions will begin
+earlier and continue throughout the remainder of keyword processing.
+See Cichelli's paper from the January 1980 Communications of the ACM for
+details.
+
+@item -p
+Changes the return value of the generated function @code{in_word_set}
+from boolean (@emph{i.e.}, 0 or 1), to either type ``pointer to
+user-defined struct,'' (if the @samp{-t} option is enabled), or simply
+to @code{char *}, if @samp{-t} is not enabled. This option is most
+useful when the @samp{-t} option (allowing user-defined structs) is
+used. For example, it is possible to automatically generate the GNU C
+reserved word lookup routine with the options @samp{-p} and @samp{-t}.
+
+@item -r
+Utilizes randomness to initialize the associated values table. This
+frequently generates solutions faster than using deterministic
+initialization (which starts all associated values at 0). Furthermore,
+using the randomization option generally increases the size of the
+table. If @code{gperf} has difficultly with a certain keyword set try using
+@samp{-r} or @samp{-D}.
+
+@item -s @var{size-multiple}
+Affects the size of the generated hash table. The numeric argument for
+this option indicates ``how many times larger or smaller'' the maximum
+associated value range should be, in relationship to the number of keys.
+If the @var{size-multiple} is negative the maximum associated value is
+calculated by @emph{dividing} it into the total number of keys. For
+example, a value of 3 means ``allow the maximum associated value to be
+about 3 times larger than the number of input keys.''
+
+Conversely, a value of -3 means ``allow the maximum associated value to
+be about 3 times smaller than the number of input keys.'' Negative
+values are useful for limiting the overall size of the generated hash
+table, though this usually increases the number of duplicate hash
+values.
+
+If `generate switch' option @samp{-S} is @emph{not} enabled, the maximum
+associated value influences the static array table size, and a larger
+table should decrease the time required for an unsuccessful search, at
+the expense of extra table space.
+
+The default value is 1, thus the default maximum associated value about
+the same size as the number of keys (for efficiency, the maximum
+associated value is always rounded up to a power of 2). The actual
+table size may vary somewhat, since this technique is essentially a
+heuristic. In particular, setting this value too high slows down
+@code{gperf}'s runtime, since it must search through a much larger range
+of values. Judicious use of the @samp{-f} option helps alleviate this
+overhead, however.
+
+@item -S @var{total switch statements}
+Causes the generated C code to use a @code{switch} statement scheme,
+rather than an array lookup table. This can lead to a reduction in both
+time and space requirements for some keyfiles. The argument to this
+option determines how many @code{switch} statements are generated. A
+value of 1 generates 1 @code{switch} containing all the elements, a
+value of 2 generates 2 tables with 1/2 the elements in each
+@code{switch}, etc. This is useful since many C compilers cannot
+correctly generate code for large @code{switch} statements. This option
+was inspired in part by Keith Bostic's original C program.
+
+@item -t
+Allows you to include a @code{struct} type declaration for generated
+code. Any text before a pair of consecutive %% is consider part of the
+type declaration. Key words and additional fields may follow this, one
+group of fields per line. A set of examples for generating perfect hash
+tables and functions for Ada, C, and G++, Pascal, and Modula 2 and 3
+reserved words are distributed with this release.
+
+@item -T
+Prevents the transfer of the type declaration to the output file. Use
+this option if the type is already defined elsewhere.
+
+@item -v
+Prints out the current version number.
+
+@item -Z @var{class name}
+Allow user to specify name of generated C++ class. Default name is
+@code{Perfect_Hash}.
+@end table
+@end itemize
+
+@node Bugs, Projects, Options, Top
+@chapter Known Bugs and Limitations with @code{gperf}
+
+The following are some limitations with the current release of
+@code{gperf}:
+
+@itemize @bullet
+@item
+The @code{gperf} utility is tuned to execute quickly, and works quickly
+for small to medium size data sets (around 1000 keywords). It is
+extremely useful for maintaining perfect hash functions for compiler
+keyword sets. Several recent enhancements now enable @code{gperf} to
+work efficiently on much larger keyword sets (over 15,000 keywords).
+When processing large keyword sets it helps greatly to have over 8 megs
+of RAM.
+
+However, since @code{gperf} does not backtrack no guaranteed solution
+occurs on every run. On the other hand, it is usually easy to obtain a
+solution by varying the option parameters. In particular, try the
+@samp{-r} option, and also try changing the default arguments to the
+@samp{-s} and @samp{-j} options. To @emph{guarantee} a solution, use
+the @samp{-D} and @samp{-S} options, although the final results are not
+likely to be a @emph{perfect} hash function anymore! Finally, use the
+@samp{-f} option if you want @code{gperf} to generate the perfect hash
+function @emph{fast}, with less emphasis on making it minimal.
+
+@item
+The size of the generate static keyword array can get @emph{extremely}
+large if the input keyword file is large or if the keywords are quite
+similar. This tends to slow down the compilation of the generated C
+code, and @emph{greatly} inflates the object code size. If this
+situation occurs, consider using the @samp{-S} option to reduce data
+size, potentially increasing keyword recognition time a negligible
+amount. Since many C compilers cannot correctly generated code for
+large switch statements it is important to qualify the @var{-S} option
+with an appropriate numerical argument that controls the number of
+switch statements generated.
+
+@item
+The maximum number of key positions selected for a given key has an
+arbitrary limit of 126. This restriction should be removed, and if
+anyone considers this a problem write me and let me know so I can remove
+the constraint.
+
+@item
+The C++ source code only compiles correctly with GNU G++, version 1.36
+(and hopefully later versions). Porting to AT&T cfront would be
+tedious, but possible (and desirable). There is also a K&R C version
+available now. This should compile without change on most BSD systems,
+but may require a bit of work to run on SYSV, since @code{gperf} uses
+@var{alloca} in several places. Send mail to schmidt at ics.uci.edu for
+information.
+@end itemize
+
+@node Projects, Implementation, Bugs, Top
+@chapter Things Still Left to Do
+
+It should be ``relatively'' easy to replace the current perfect hash
+function algorithm with a more exhaustive approach; the perfect hash
+module is essential independent from other program modules. Additional
+worthwhile improvements include:
+
+@itemize @bullet
+@item
+Make the algorithm more robust. At present, the program halts with an
+error diagnostic if it can't find a direct solution and the @samp{-D}
+option is not enabled. A more comprehensive, albeit computationally
+expensive, approach would employ backtracking or enable alternative
+options and retry. It's not clear how helpful this would be, in
+general, since most search sets are rather small in practice.
+
+@item
+Another useful extension involves modifying the program to generate
+``minimal'' perfect hash functions (under certain circumstances, the
+current version can be rather extravagant in the generated table size).
+Again, this is mostly of theoretical interest, since a sparse table
+often produces faster lookups, and use of the @samp{-S} @code{switch}
+option can minimize the data size, at the expense of slightly longer
+lookups (note that the gcc compiler generally produces good code for
+@code{switch} statements, reducing the need for more complex schemes).
+
+@item
+In addition to improving the algorithm, it would also be useful to
+generate a C++ class or Ada package as the code output, in addition to
+the current C routines.
+@end itemize
+
+@node Implementation, Bibliography, Projects, Top
+@chapter Implementation Details of GNU @code{gperf}
+
+A paper describing the high-level description of the data structures and
+algorithms used to implement @code{gperf} will soon be available. This
+paper is useful not only from a maintenance and enhancement perspective,
+but also because they demonstrate several clever and useful programming
+techniques, @emph{e.g.}, `Iteration Number' boolean arrays, double
+hashing, a ``safe'' and efficient method for reading arbitrarily long
+input from a file, and a provably optimal algorithm for simultaneously
+determining both the minimum and maximum elements in a list.
+
+@page
+
+@node Bibliography, , Implementation, Top
+@chapter Bibliography
+
+[1] Chang, C.C.: @i{A Scheme for Constructing Ordered Minimal Perfect
+Hashing Functions} Information Sciences 39(1986), 187-195.
+
+[2] Cichelli, Richard J. @i{Author's Response to ``On Cichelli's Minimal Perfec
+t Hash
+Functions Method''} Communications of the ACM, 23, 12(December 1980), 729.
+
+[3] Cichelli, Richard J. @i{Minimal Perfect Hash Functions Made Simple}
+Communications of the ACM, 23, 1(January 1980), 17-19.
+
+[4] Cook, C. R. and Oldehoeft, R.R. @i{A Letter Oriented Minimal
+Perfect Hashing Function} SIGPLAN Notices, 17, 9(September 1982), 18-27.
+
+[5] Cormack, G. V. and Horspool, R. N. S. and Kaiserwerth, M.
+@i{Practical Perfect Hashing} Computer Journal, 28, 1(January 1985), 54-58.
+
+[6] Jaeschke, G. @i{Reciprocal Hashing: A Method for Generating Minimal
+Perfect Hashing Functions} Communications of the ACM, 24, 12(December
+1981), 829-833.
+
+[7] Jaeschke, G. and Osterburg, G. @i{On Cichelli's Minimal Perfect
+Hash Functions Method} Communications of the ACM, 23, 12(December 1980),
+728-729.
+
+[8] Sager, Thomas J. @i{A Polynomial Time Generator for Minimal Perfect
+Hash Functions} Communications of the ACM, 28, 5(December 1985), 523-532
+
+[9] Schmidt, Douglas C. @i{GPERF: A Perfect Hash Function Generator}
+Second USENIX C++ Conference Proceedings, April 1990.
+
+[10] Sebesta, R.W. and Taylor, M.A. @i{Minimal Perfect Hash Functions
+for Reserved Word Lists} SIGPLAN Notices, 20, 12(September 1985), 47-53.
+
+[11] Sprugnoli, R. @i{Perfect Hashing Functions: A Single Probe
+Retrieving Method for Static Sets} Communications of the ACM, 20
+11(November 1977), 841-850.
+
+[12] Stallman, Richard M. @i{Using and Porting GNU CC} Free Software Foundation,
+1988.
+
+[13] Stroustrup, Bjarne @i{The C++ Programming Language.} Addison-Wesley, 1986.
+
+[14] Tiemann, Michael D. @i{User's Guide to GNU C++} Free Software
+Foundation, 1989.
+
+@contents
+@bye
diff --git a/apps/gperf/src/Bool_Array.cpp b/apps/gperf/src/Bool_Array.cpp
new file mode 100644
index 00000000000..e3243565f41
--- /dev/null
+++ b/apps/gperf/src/Bool_Array.cpp
@@ -0,0 +1,89 @@
+/* Fast lookup table abstraction implemented as an Iteration Number Array
+// @(#)Bool_Array.cpp 1.1 10/18/96
+
+ Copyright (C) 1989 Free Software Foundation, Inc.
+ written by Douglas C. Schmidt (schmidt@ics.uci.edu)
+
+This file is part of GNU GPERF.
+
+GNU GPERF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 1, or (at your option)
+any later version.
+
+GNU GPERF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU GPERF; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
+
+#include "Bool_Array.h"
+
+// Prints out debugging diagnostics.
+
+Bool_Array::~Bool_Array (void)
+{
+ if (option[DEBUG])
+ fprintf (stderr, "\ndumping boolean array information\n"
+ "size = %d\niteration number = %d\nend of array dump\n",
+ size, generation_number);
+}
+
+Bool_Array::Bool_Array (void)
+ : storage_array (0),
+ generation_number (0),
+ size (0)
+{
+}
+
+void
+Bool_Array::init (STORAGE_TYPE *buffer, STORAGE_TYPE s)
+{
+ size = s;
+ generation_number = 1;
+ storage_array = buffer;
+
+ memset (storage_array, 0, s * sizeof *storage_array);
+
+ if (option[DEBUG])
+ fprintf (stderr, "\nbool array size = %d, total bytes = %d\n",
+ size, size * sizeof *storage_array);
+}
+
+int
+Bool_Array::find (int index)
+{
+ if (storage_array[index] == generation_number)
+ return 1;
+ else
+ {
+ storage_array[index] = generation_number;
+ return 0;
+ }
+}
+
+void
+Bool_Array::reset (void)
+{
+ if (++generation_number == 0)
+ {
+ if (option[DEBUG])
+ {
+ fprintf (stderr, "(re-initializing bool_array)...");
+ fflush (stderr);
+ }
+
+ generation_number = 1;
+ memset (storage_array, 0, size * sizeof *storage_array);
+
+ if (option[DEBUG])
+ {
+ fprintf (stderr, "done\n");
+ fflush (stderr);
+ }
+ }
+}
+
diff --git a/apps/gperf/src/Bool_Array.h b/apps/gperf/src/Bool_Array.h
new file mode 100644
index 00000000000..d890484e485
--- /dev/null
+++ b/apps/gperf/src/Bool_Array.h
@@ -0,0 +1,65 @@
+/* -*- C++ -*- */
+// @(#)Bool_Array.h 1.1 10/18/96
+
+/* This may look like C code, but it is really -*- C++ -*- */
+
+/* Simple lookup table abstraction implemented as an Generation Number Array.
+
+ Copyright (C) 1989 Free Software Foundation, Inc.
+ written by Douglas C. Schmidt (schmidt@ics.uci.edu)
+
+This file is part of GNU GPERF.
+
+GNU GPERF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 1, or (at your option)
+any later version.
+
+GNU GPERF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU GPERF; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.*/
+
+/* Define and implement a simple boolean array abstraction,
+ uses an Generation Numbering implementation to save on initialization time. */
+
+#ifndef bool_array_h
+#define bool_array_h 1
+
+#include "Options.h"
+
+#ifdef LO_CAL
+/* If we are on a memory diet then we'll only make these use a limited
+ amount of storage space. */
+typedef u_short STORAGE_TYPE;
+#else
+typedef int STORAGE_TYPE;
+#endif
+
+class Bool_Array
+{
+public:
+ Bool_Array (void);
+ ~Bool_Array (void);
+
+ void init (STORAGE_TYPE *buffer, STORAGE_TYPE s);
+ int find (int hash_value);
+ void reset (void);
+
+private:
+ STORAGE_TYPE *storage_array;
+ // Initialization of the index space.
+
+ STORAGE_TYPE generation_number;
+ // Keep track of the current Generation.
+
+ int size;
+ // Keep track of array size.
+};
+
+
+#endif
diff --git a/apps/gperf/src/Gen_Perf.cpp b/apps/gperf/src/Gen_Perf.cpp
new file mode 100644
index 00000000000..25c0299fd35
--- /dev/null
+++ b/apps/gperf/src/Gen_Perf.cpp
@@ -0,0 +1,345 @@
+/* Provides high-level routines to manipulate the keywork list
+// @(#)Gen_Perf.cpp 1.1 10/18/96
+
+ structures the code generation output.
+ Copyright (C) 1989 Free Software Foundation, Inc.
+ written by Douglas C. Schmidt (schmidt@ics.uci.edu)
+
+This file is part of GNU GPERF.
+
+GNU GPERF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 1, or (at your option)
+any later version.
+
+GNU GPERF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU GPERF; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
+
+#include "Vectors.h"
+#include "Gen_Perf.h"
+
+/* Current release version. */
+extern char *version_string;
+
+/* Reads input keys, possibly applies the reordering heuristic, sets
+ the maximum associated value size (rounded up to the nearest power
+ of 2), may initialize the associated values array, and determines
+ the maximum hash table size. Note: using the random numbers is
+ often helpful, though not as deterministic, of course! */
+
+Gen_Perf::Gen_Perf (void)
+{
+ int asso_value_max;
+ int non_linked_length;
+
+ this->key_list.read_keys ();
+ if (option[ORDER])
+ this->key_list.reorder ();
+ asso_value_max = option.get_asso_max ();
+ non_linked_length = this->key_list.keyword_list_length ();
+ num_done = 1;
+ fewest_collisions = 0;
+ if (asso_value_max == 0)
+ asso_value_max = non_linked_length;
+ else if (asso_value_max > 0)
+ asso_value_max *= non_linked_length;
+ else /* if (asso_value_max < 0) */
+ asso_value_max = non_linked_length / -asso_value_max;
+ option.set_asso_max (ACE_POW (asso_value_max));
+
+ if (option[RANDOM])
+ {
+ srand (time (0));
+
+ for (int i = 0; i < ALPHA_SIZE; i++)
+ Vectors::asso_values[i] = (rand () & asso_value_max - 1);
+ }
+ else
+ {
+ int asso_value = option.initial_value ();
+
+ if (asso_value) /* Initialize array if user requests non-zero default. */
+ for (int i = ALPHA_SIZE - 1; i >= 0; i--)
+ Vectors::asso_values[i] = asso_value & option.get_asso_max () - 1;
+ }
+ max_hash_value = this->key_list.max_key_length () + option.get_asso_max () *
+ option.get_max_keysig_size ();
+
+ printf ("/* ");
+ if (option[C])
+ printf ("C");
+ else if (option[CPLUSPLUS])
+ printf ("C++");
+ printf (" code produced by gperf version %s */\n", version_string);
+ Options::print_options ();
+
+ if (option[DEBUG])
+ fprintf (stderr, "total non-linked keys = %d\nmaximum associated value is %d"
+ "\nmaximum size of generated hash table is %d\n",
+ non_linked_length, asso_value_max, max_hash_value);
+}
+
+/* Merge two disjoint hash key multisets to form the ordered disjoint union of the sets.
+ (In a multiset, an element can occur multiple times).
+ Precondition: both set_1 and set_2 must be ordered. Returns the length
+ of the combined set. */
+
+inline int
+Gen_Perf::compute_disjoint_union (char *set_1, char *set_2, char *set_3)
+{
+ char *base = set_3;
+
+ while (*set_1 && *set_2)
+ if (*set_1 == *set_2)
+ set_1++, set_2++;
+ else
+ {
+ *set_3 = *set_1 < *set_2 ? *set_1++ : *set_2++;
+ if (set_3 == base || *set_3 != *(set_3-1)) set_3++;
+ }
+
+ while (*set_1)
+ {
+ *set_3 = *set_1++;
+ if (set_3 == base || *set_3 != *(set_3-1)) set_3++;
+ }
+
+ while (*set_2)
+ {
+ *set_3 = *set_2++;
+ if (set_3 == base || *set_3 != *(set_3-1)) set_3++;
+ }
+ *set_3 = '\0';
+ return set_3 - base;
+}
+
+/* Sort the UNION_SET in increasing frequency of occurrence.
+ This speeds up later processing since we may assume the resulting
+ set (Set_3, in this case), is ordered. Uses insertion sort, since
+ the UNION_SET is typically short. */
+
+inline void
+Gen_Perf::sort_set (char *union_set, int len)
+{
+ int i, j;
+
+ for (i = 0, j = len - 1; i < j; i++)
+ {
+ char curr, tmp;
+
+ for (curr = i + 1, tmp = union_set[curr];
+ curr > 0 && Vectors::occurrences[tmp] < Vectors::occurrences[union_set[curr-1]];
+ curr--)
+ union_set[curr] = union_set[curr - 1];
+
+ union_set[curr] = tmp;
+ }
+}
+
+/* Generate a key set's hash value. */
+
+inline int
+Gen_Perf::hash (List_Node *key_node)
+{
+ int sum = option[NOLENGTH] ? 0 : key_node->length;
+
+ for (char *ptr = key_node->char_set; *ptr; ptr++)
+ sum += Vectors::asso_values[*ptr];
+
+ return key_node->hash_value = sum;
+}
+
+/* Find out how character value change affects successfully hashed
+ items. Returns FALSE if no other hash values are affected, else
+ returns TRUE. Note that because Option.Get_Asso_Max is a power of
+ two we can guarantee that all legal Vectors::Asso_Values are visited without
+ repetition since Option.Get_Jump was forced to be an odd value! */
+
+inline int
+Gen_Perf::affects_prev (char c, List_Node *curr)
+{
+ int original_char = Vectors::asso_values[c];
+ int total_iterations = !option[FAST]
+ ? option.get_asso_max () : option.get_iterations () ? option.get_iterations () : this->key_list.keyword_list_length ();
+
+ /* Try all legal associated values. */
+
+ for (int i = total_iterations - 1; i >= 0; i--)
+ {
+ int collisions = 0;
+
+ Vectors::asso_values[c] = Vectors::asso_values[c] + (option.get_jump () ? option.get_jump () : rand ())
+ & option.get_asso_max () - 1;
+
+ /* Iteration Number array is a win, O(1) intialization time! */
+ this->char_search.reset ();
+
+ /* See how this asso_value change affects previous keywords. If
+ it does better than before we'll take it! */
+
+ for (List_Node *ptr = this->key_list.head;
+ !this->char_search.find (hash (ptr)) || ++collisions < fewest_collisions;
+ ptr = ptr->next)
+ if (ptr == curr)
+ {
+ fewest_collisions = collisions;
+ if (option[DEBUG])
+ fprintf (stderr, "- resolved after %d iterations", total_iterations - i);
+ return 0;
+ }
+ }
+
+ /* Restore original values, no more tries. */
+ Vectors::asso_values[c] = original_char;
+ /* If we're this far it's time to try the next character.... */
+ return 1;
+}
+
+/* Change a character value, try least-used characters first. */
+
+void
+Gen_Perf::change (List_Node *prior, List_Node *curr)
+{
+ static char *union_set;
+
+ if (!union_set)
+ union_set = new char [2 * option.get_max_keysig_size () + 1];
+
+ if (option[DEBUG])
+ {
+ fprintf (stderr, "collision on keyword #%d, prior = \"%s\", curr = \"%s\" hash = %d\n",
+ num_done, prior->key, curr->key, curr->hash_value);
+ fflush (stderr);
+ }
+ sort_set (union_set, compute_disjoint_union (prior->char_set, curr->char_set, union_set));
+
+ /* Try changing some values, if change doesn't alter other values continue normal action. */
+ fewest_collisions++;
+
+ for (char *temp = union_set; *temp; temp++)
+ if (!affects_prev (*temp, curr))
+ {
+ if (option[DEBUG])
+ {
+ fprintf (stderr, " by changing asso_value['%c'] (char #%d) to %d\n",
+ *temp, temp - union_set + 1, Vectors::asso_values[*temp]);
+ fflush (stderr);
+ }
+ return; /* Good, doesn't affect previous hash values, we'll take it. */
+ }
+
+ for (List_Node *ptr = this->key_list.head; ptr != curr; ptr = ptr->next)
+ hash (ptr);
+
+ hash (curr);
+
+ if (option[DEBUG])
+ {
+ fprintf (stderr, "** collision not resolved after %d iterations, %d duplicates remain, continuing...\n",
+ !option[FAST] ? option.get_asso_max () : option.get_iterations () ? option.get_iterations () : this->key_list.keyword_list_length (),
+ fewest_collisions + this->key_list.total_duplicates);
+ fflush (stderr);
+ }
+}
+
+/* Does the hard stuff....
+ Initializes the Iteration Number array, and attempts to find a perfect
+ function that will hash all the key words without getting any
+ duplications. This is made much easier since we aren't attempting
+ to generate *minimum* functions, only perfect ones.
+ If we can't generate a perfect function in one pass *and* the user
+ hasn't enabled the DUP option, we'll inform the user to try the
+ randomization option, use -D, or choose alternative key positions.
+ The alternatives (e.g., back-tracking) are too time-consuming, i.e,
+ exponential in the number of keys. */
+
+int
+Gen_Perf::generate (void)
+{
+#if LARGE_STACK_ARRAYS
+ STORAGE_TYPE buffer[max_hash_value + 1];
+#else
+ // Note: we don't use new, because that invokes a custom operator new.
+ STORAGE_TYPE *buffer
+ = (STORAGE_TYPE*) malloc (sizeof(STORAGE_TYPE) * (max_hash_value + 1));
+ if (buffer == NULL)
+ abort ();
+#endif
+
+ this->char_search.init (buffer, max_hash_value + 1);
+
+ List_Node *curr;
+
+ for (curr = this->key_list.head;
+ curr;
+ curr = curr->next)
+ {
+ hash (curr);
+
+ for (List_Node *ptr = this->key_list.head;
+ ptr != curr;
+ ptr = ptr->next)
+ if (ptr->hash_value == curr->hash_value)
+ {
+ change (ptr, curr);
+ break;
+ }
+ num_done++;
+ }
+
+ /* Make one final check, just to make sure nothing weird happened.... */
+
+ this->char_search.reset ();
+
+ for (curr = this->key_list.head;
+ curr;
+ curr = curr->next)
+ if (this->char_search.find (hash (curr)))
+ if (option[DUP]) /* Keep track of this number... */
+ this->key_list.total_duplicates++;
+ else /* Yow, big problems. we're outta here! */
+ {
+ ACE_ERROR ((LM_ERROR, "\nInternal error, duplicate value %d:\n"
+ "try options -D or -r, or use new key positions.\n\n", hash (curr)));
+#if !LARGE_STACK_ARRAYS
+ free (buffer);
+#endif
+ return 1;
+ }
+
+ /* Sorts the key word list by hash value, and then outputs the list.
+ The generated hash table code is only output if the early stage of
+ processing turned out O.K. */
+
+ this->key_list.sort ();
+ this->key_list.output ();
+#if !LARGE_STACK_ARRAYS
+ free (buffer);
+#endif
+ return 0;
+}
+
+/* Prints out some diagnostics upon completion. */
+
+Gen_Perf::~Gen_Perf (void)
+{
+ if (option[DEBUG])
+ {
+ fprintf (stderr, "\ndumping occurrence and associated values tables\n");
+
+ for (int i = 0; i < ALPHA_SIZE; i++)
+ if (Vectors::occurrences[i])
+ fprintf (stderr, "Vectors::asso_values[%c] = %6d, Vectors::occurrences[%c] = %6d\n",
+ i, Vectors::asso_values[i], i, Vectors::occurrences[i]);
+
+ fprintf (stderr, "end table dumping\n");
+
+ }
+}
+
diff --git a/apps/gperf/src/Gen_Perf.h b/apps/gperf/src/Gen_Perf.h
new file mode 100644
index 00000000000..11817de4851
--- /dev/null
+++ b/apps/gperf/src/Gen_Perf.h
@@ -0,0 +1,65 @@
+/* -*- C++ -*- */
+// @(#)Gen_Perf.h 1.1 10/18/96
+
+/* This may look like C code, but it is really -*- C++ -*- */
+
+/* Provides high-level routines to manipulate the keyword list
+ structures the code generation output.
+
+ Copyright (C) 1989 Free Software Foundation, Inc.
+ written by Douglas C. Schmidt (schmidt@ics.uci.edu)
+
+This file is part of GNU GPERF.
+
+GNU GPERF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 1, or (at your option)
+any later version.
+
+GNU GPERF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU GPERF; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
+
+#ifndef gen_perf_h
+#define gen_perf_h 1
+
+#include "Options.h"
+#include "Key_List.h"
+#include "Bool_Array.h"
+
+class Gen_Perf
+{
+public:
+ Gen_Perf (void);
+ ~Gen_Perf (void);
+ int generate (void);
+
+private:
+ void change (List_Node *prior, List_Node *curr);
+ int affects_prev (char c, List_Node *curr);
+ static int hash (List_Node *key_node);
+ static int compute_disjoint_union (char *set_1, char *set_2, char *set_3);
+ static void sort_set (char *union_set, int len);
+
+ int max_hash_value;
+ // Maximum possible hash value.
+
+ int fewest_collisions;
+ // Records fewest # of collisions for asso value.
+
+ int num_done;
+ // Number of keywords processed without a collision.
+
+ Bool_Array char_search;
+ // Table that keeps track of key collisions.
+
+ Key_List key_list;
+ // List of the keys we're trying to map into a perfect hash
+ // function.
+};
+#endif
diff --git a/apps/gperf/src/Hash_Table.cpp b/apps/gperf/src/Hash_Table.cpp
new file mode 100644
index 00000000000..dfb008514ce
--- /dev/null
+++ b/apps/gperf/src/Hash_Table.cpp
@@ -0,0 +1,84 @@
+/* Hash table for checking keyword links. Implemented using double hashing.
+// @(#)Hash_Table.cpp 1.1 10/18/96
+
+ Copyright (C) 1989 Free Software Foundation, Inc.
+ written by Douglas C. Schmidt (schmidt@ics.uci.edu)
+
+This file is part of GNU GPERF.
+
+GNU GPERF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 1, or (at your option)
+any later version.
+
+GNU GPERF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU GPERF; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
+
+#include "ace/ACE.h"
+#include "Hash_Table.h"
+
+#define NIL(TYPE) (TYPE *)0
+
+// The size of the hash table is always the smallest power of 2 >= the
+// size indicated by the user. This allows several optimizations,
+// including the use of double hashing and elimination of the mod
+// instruction. Note that the size had better be larger than the
+// number of items in the hash table, else there's trouble!!! Note
+// that the memory for the hash table is allocated *outside* the
+// intialization routine. This compromises information hiding
+// somewhat, but greatly reduces memory fragmentation, since we can
+// now use alloca!
+
+Hash_Table::Hash_Table (List_Node **table_ptr, int s)
+ : collisions (0),
+ size (s),
+ table (table_ptr)
+{
+ memset ((char *) table, 0, size * sizeof *table);
+}
+
+Hash_Table::~Hash_Table (void)
+{
+ if (option[DEBUG])
+ {
+ int field_width = option.get_max_keysig_size ();
+
+ fprintf (stderr, "\ndumping the hash table\ntotal available table slots = %d, total bytes = %d, total collisions = %d\n"
+ "location, %*s, keyword\n", size, size * sizeof *table, collisions, field_width, "keysig");
+
+ for (int i = size - 1; i >= 0; i--)
+ if (table[i])
+ fprintf (stderr, "%8d, %*s, %s\n",
+ i, field_width, table[i]->char_set, table[i]->key);
+
+ fprintf (stderr, "\nend dumping hash table\n\n");
+ }
+}
+
+// If the ITEM is already in the hash table return the item found in
+// the table. Otherwise inserts the ITEM, and returns FALSE. Uses
+// double hashing.
+
+List_Node *
+Hash_Table::operator() (List_Node *item, int ignore_length)
+{
+ unsigned hash_val = ACE::hash_pjw (item->char_set);
+ int probe = hash_val & size - 1;
+ int increment = (hash_val ^ item->length | 1) & size - 1;
+
+ while (table[probe]
+ && (strcmp (table[probe]->char_set, item->char_set)
+ || (!ignore_length && table[probe]->length != item->length)))
+ {
+ collisions++;
+ probe = probe + increment & size - 1;
+ }
+
+ return table[probe] ? table[probe] : (table[probe] = item, NIL (List_Node));
+}
diff --git a/apps/gperf/src/Hash_Table.h b/apps/gperf/src/Hash_Table.h
new file mode 100644
index 00000000000..c7a77a1b37b
--- /dev/null
+++ b/apps/gperf/src/Hash_Table.h
@@ -0,0 +1,50 @@
+/* -*- C++ -*- */
+// @(#)Hash_Table.h 1.1 10/18/96
+
+/* This may look like C code, but it is really -*- C++ -*- */
+
+/* Hash table used to check for duplicate keyword entries.
+
+ Copyright (C) 1989 Free Software Foundation, Inc.
+ written by Douglas C. Schmidt (schmidt@ics.uci.edu)
+
+This file is part of GNU GPERF.
+
+GNU GPERF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 1, or (at your option)
+any later version.
+
+GNU GPERF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU GPERF; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
+
+#ifndef hash_table_h
+#define hash_table_h 1
+
+#include "Options.h"
+#include "List_Node.h"
+
+class Hash_Table
+{
+public:
+ Hash_Table (List_Node **t, int s);
+ ~Hash_Table (void);
+ List_Node *operator () (List_Node *item, int ignore_length);
+
+private:
+ List_Node **table;
+ // Vector of pointers to linked lists of List_Node's.
+
+ int size;
+ // Size of the vector.
+
+ int collisions;
+ // Find out how well our double hashing is working!
+};
+#endif
diff --git a/apps/gperf/src/Iterator.cpp b/apps/gperf/src/Iterator.cpp
new file mode 100644
index 00000000000..2e5d37f8f00
--- /dev/null
+++ b/apps/gperf/src/Iterator.cpp
@@ -0,0 +1,90 @@
+/* Provides an Iterator for keyword characters.
+// @(#)Iterator.cpp 1.1 10/18/96
+
+ Copyright (C) 1989 Free Software Foundation, Inc.
+ written by Douglas C. Schmidt (schmidt@ics.uci.edu)
+
+This file is part of GNU GPERF.
+
+GNU GPERF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 1, or (at your option)
+any later version.
+
+GNU GPERF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU GPERF; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
+
+#include "Iterator.h"
+
+// Constructor for Iterator.
+
+Iterator::Iterator (char *s,
+ int lo,
+ int hi,
+ int word_end,
+ int bad_val,
+ int key_end)
+ : end (key_end),
+ error_value (bad_val),
+ end_word (word_end),
+ str (s),
+ hi_bound (hi),
+ lo_bound (lo)
+{
+}
+
+// Provide an Iterator, returning the ``next'' value from the list of
+// valid values given in the constructor.
+
+int
+Iterator::operator() (void)
+{
+ // Variables to record the Iterator's status when handling ranges,
+ // e.g., 3-12.
+
+ static int size;
+ static int curr_value;
+ static int upper_bound;
+
+ if (size)
+ {
+ if (++curr_value >= upper_bound)
+ size = 0;
+ return curr_value;
+ }
+ else
+ {
+ while (*str)
+ switch (*str)
+ {
+ default: return error_value;
+ case ',': str++; break;
+ case '$': str++; return end_word;
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ for (curr_value = 0; isdigit (*str); str++)
+ curr_value = curr_value * 10 + *str - '0';
+
+ if (*str == '-')
+ {
+
+ for (size = 1, upper_bound = 0;
+ isdigit (*++str);
+ upper_bound = upper_bound * 10 + *str - '0');
+
+ if (upper_bound <= curr_value || upper_bound > hi_bound)
+ return error_value;
+ }
+ return curr_value >= lo_bound && curr_value <= hi_bound
+ ? curr_value : error_value;
+ }
+
+ return end;
+ }
+}
diff --git a/apps/gperf/src/Iterator.h b/apps/gperf/src/Iterator.h
new file mode 100644
index 00000000000..d2c81039b3f
--- /dev/null
+++ b/apps/gperf/src/Iterator.h
@@ -0,0 +1,67 @@
+/* -*- C++ -*- */
+// @(#)Iterator.h 1.1 10/18/96
+
+/* This may look like C code, but it is really -*- C++ -*- */
+
+/* Provides an Iterator for keyword characters.
+
+ Copyright (C) 1989 Free Software Foundation, Inc.
+ written by Douglas C. Schmidt (schmidt@ics.uci.edu)
+
+This file is part of GNU GPERF.
+
+GNU GPERF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 1, or (at your option)
+any later version.
+
+GNU GPERF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU GPERF; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+
+/* Provides an Iterator that expands and decodes a control string
+ containing digits and ranges, returning an integer every time the
+ generator function is called. This is used to decode the user's
+ key position requests. For example: "-k 1,2,5-10,$" will return 1,
+ 2, 5, 6, 7, 8, 9, 10, and 0 ( representing the abstract ``last
+ character of the key'' on successive calls to the member function
+ operator (). No errors are handled in these routines, they are
+ passed back to the calling routines via a user-supplied Error_Value */
+
+#ifndef iterator_h
+#define iterator_h 1
+
+#include "Options.h"
+
+class Iterator
+{
+public:
+ Iterator (char *s, int lo, int hi, int word_end, int bad_val, int key_end);
+ int operator () (void);
+
+private:
+ char *str;
+ // A pointer to the string provided by the user.
+
+ int end;
+ // Value returned after last key is processed.
+
+ int end_word;
+ // A value marking the abstract ``end of word'' (usually '$').
+
+ int error_value;
+ // Error value returned when input is syntactically erroneous.
+
+ int hi_bound;
+ // Greatest possible value, inclusive.
+
+ int lo_bound;
+ // Smallest possible value, inclusive.
+};
+
+#endif
diff --git a/apps/gperf/src/Key_List.cpp b/apps/gperf/src/Key_List.cpp
new file mode 100644
index 00000000000..3a944b4b28b
--- /dev/null
+++ b/apps/gperf/src/Key_List.cpp
@@ -0,0 +1,1345 @@
+/* Routines for building, ordering, and printing the keyword list.
+// @(#)Key_List.cpp 1.1 10/18/96
+
+ Copyright (C) 1989 Free Software Foundation, Inc.
+ written by Douglas C. Schmidt (schmidt@ics.uci.edu)
+
+This file is part of GNU GPERF.
+
+GNU GPERF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 1, or (at your option)
+any later version.
+
+GNU GPERF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU GPERF; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
+
+#include "ace/Read_Buffer.h"
+#include "Hash_Table.h"
+#include "Vectors.h"
+#include "Key_List.h"
+
+/* Make the hash table 10 times larger than the number of keyword entries. */
+static const int TABLE_MULTIPLE = 10;
+
+/* Default type for generated code. */
+static char *const default_array_type = "char *";
+
+/* in_word_set return type, by default. */
+static char *const default_return_type = "char *";
+
+/* How wide the printed field width must be to contain the maximum hash value. */
+static int field_width = 0;
+
+static int determined[ALPHA_SIZE];
+
+/* Destructor dumps diagnostics during debugging. */
+
+Key_List::~Key_List (void)
+{
+ if (option[DEBUG])
+ {
+ fprintf (stderr, "\nDumping key list information:\ntotal non-static linked keywords = %d"
+ "\ntotal keywords = %d\ntotal duplicates = %d\nmaximum key length = %d\n",
+ list_len, total_keys, total_duplicates ? total_duplicates + 1 : 0, max_key_len);
+ dump ();
+ ACE_ERROR ((LM_ERROR, "End dumping list.\n\n"));
+ }
+}
+
+/* Gathers the input stream into a buffer until one of two things occur:
+
+ 1. We read a '%' followed by a '%'
+ 2. We read a '%' followed by a '}'
+
+ The first symbolizes the beginning of the keyword list proper,
+ The second symbolizes the end of the C source code to be generated
+ verbatim in the output file.
+
+ I assume that the keys are separated from the optional preceding struct
+ declaration by a consecutive % followed by either % or } starting in
+ the first column. The code below uses an expandible buffer to scan off
+ and return a pointer to all the code (if any) appearing before the delimiter. */
+
+char *
+Key_List::get_special_input (char delimiter)
+{
+ int size = 80;
+ char *buf = new char[size];
+ int c, i;
+
+ for (i = 0; (c = getchar ()) != EOF; i++)
+ {
+ if (c == '%')
+ {
+ if ((c = getchar ()) == delimiter)
+ {
+
+ while ((c = getchar ()) != '\n')
+ ; /* discard newline */
+
+ if (i == 0)
+ return "";
+ else
+ {
+ buf[delimiter == '%' && buf[i - 2] == ';' ? i - 2 : i - 1] = '\0';
+ return buf;
+ }
+ }
+ else
+ buf[i++] = '%';
+ }
+ else if (i >= size) /* Yikes, time to grow the buffer! */
+ {
+ char *temp = new char[size *= 2];
+ int j;
+
+ for (j = 0; j < i; j++)
+ temp[j] = buf[j];
+
+ buf = temp;
+ }
+ buf[i] = c;
+ }
+
+ return 0; /* Problem here. */
+}
+
+/* Stores any C text that must be included verbatim into the
+ generated code output. */
+
+char *
+Key_List::save_include_src (void)
+{
+ int c;
+
+ if ((c = getchar ()) != '%')
+ ungetc (c, stdin);
+ else if ((c = getchar ()) != '{')
+ ACE_ERROR ((LM_ERROR, "internal error, %c != '{' on line %d in file %s%a", c, __LINE__, __FILE__, 1));
+ else
+ return get_special_input ('}');
+ return "";
+}
+
+/* Determines from the input file whether the user wants to build a table
+ from a user-defined struct, or whether the user is content to simply
+ use the default array of keys. */
+
+char *
+Key_List::get_array_type (void)
+{
+ return get_special_input ('%');
+}
+
+/* strcspn - find length of initial segment of S consisting entirely
+ ANSI string package, when GNU libc comes out I'll replace this...). */
+
+inline int
+Key_List::strcspn (const char *s, const char *reject)
+{
+ const char *scan;
+ const char *rej_scan;
+ int count = 0;
+
+ for (scan = s; *scan; scan++)
+ {
+
+ for (rej_scan = reject; *rej_scan; rej_scan++)
+ if (*scan == *rej_scan)
+ return count;
+
+ count++;
+ }
+
+ return count;
+}
+
+/* Sets up the Return_Type, the Struct_Tag type and the Array_Type
+ based upon various user Options. */
+
+void
+Key_List::set_output_types (void)
+{
+ if (option[TYPE] && !(array_type = get_array_type ()))
+ return; /* Something's wrong, bug we'll catch it later on.... */
+ else if (option[TYPE]) /* Yow, we've got a user-defined type... */
+ {
+ int struct_tag_length = strcspn (array_type, "{\n\0");
+
+ if (option[POINTER]) /* And it must return a pointer... */
+ {
+ return_type = new char[struct_tag_length + 2];
+ strncpy (return_type, array_type, struct_tag_length);
+ return_type[struct_tag_length] = '*';
+ return_type[struct_tag_length + 1] = '\0';
+ }
+
+ struct_tag = new char[struct_tag_length + 1];
+ strncpy (struct_tag, array_type, struct_tag_length);
+ struct_tag[struct_tag_length] = '\0';
+ }
+ else if (option[POINTER]) /* Return a char *. */
+ return_type = default_array_type;
+}
+
+/* Reads in all keys from standard input and creates a linked list pointed
+ to by Head. This list is then quickly checked for ``links,'' i.e.,
+ unhashable elements possessing identical key sets and lengths. */
+
+void
+Key_List::read_keys (void)
+{
+ include_src = save_include_src ();
+ set_output_types ();
+
+ ACE_Read_Buffer input (stdin);
+
+ char *ptr = input.read ('\n');
+
+ if (ptr == 0)
+ // Oops, problem with the input file.
+ ACE_ERROR ((LM_ERROR, "No words in input file, did you forget to prepend %s"
+ " or use -t accidentally?\n%a", "%%", 1));
+
+ /* Read in all the keywords from the input file. */
+ else
+ {
+ const char *delimiter = option.get_delimiter ();
+ List_Node *temp, *trail = 0;
+
+ head = new List_Node (ptr, strcspn (ptr, delimiter));
+
+ for (temp = head;
+ (ptr = input.read ('\n')) && strcmp (ptr, "%%");
+ temp = temp->next)
+ {
+ temp->next = new List_Node (ptr, strcspn (ptr, delimiter));
+ total_keys++;
+ }
+
+ /* See if any additional source code is included at end of this file. */
+ if (ptr)
+ additional_code = 1;
+
+ /* Hash table this number of times larger than keyword number. */
+ int table_size = (list_len = total_keys) * TABLE_MULTIPLE;
+
+#if LARGE_STACK_ARRAYS
+ /* By allocating the memory here we save on dynamic allocation overhead.
+ Table must be a power of 2 for the hash function scheme to work. */
+ List_Node *table[ACE_POW (table_size)];
+#else
+ // Note: we don't use new, because that invokes a custom operator new.
+ int malloc_size = ACE_POW (table_size) * sizeof(List_Node*);
+ if (malloc_size == 0) malloc_size = 1;
+ List_Node **table = (List_Node**)malloc(malloc_size);
+ if (table == NULL)
+ abort ();
+#endif
+
+ /* Make large hash table for efficiency. */
+ Hash_Table found_link (table, table_size);
+
+ /* Test whether there are any links and also set the maximum length of
+ an identifier in the keyword list. */
+
+ for (temp = head; temp; temp = temp->next)
+ {
+ List_Node *ptr = found_link (temp, option[NOLENGTH]);
+
+ /* Check for links. We deal with these by building an equivalence class
+ of all duplicate values (i.e., links) so that only 1 keyword is
+ representative of the entire collection. This *greatly* simplifies
+ processing during later stages of the program. */
+
+ if (ptr)
+ {
+ total_duplicates++;
+ list_len--;
+ trail->next = temp->next;
+ temp->link = ptr->link;
+ ptr->link = temp;
+
+ /* Complain if user hasn't enabled the duplicate option. */
+ if (!option[DUP] || option[DEBUG])
+ ACE_ERROR ((LM_ERROR, "Key link: \"%s\" = \"%s\", with key set \"%s\".\n",
+ temp->key, ptr->key, temp->char_set));
+ }
+ else
+ trail = temp;
+
+ /* Update minimum and maximum keyword length, if needed. */
+ if (max_key_len < temp->length)
+ max_key_len = temp->length;
+ if (min_key_len > temp->length)
+ min_key_len = temp->length;
+ }
+
+#if !LARGE_STACK_ARRAYS
+ free (table);
+#endif
+
+ /* Exit program if links exists and option[DUP] not set, since we can't continue */
+ if (total_duplicates)
+ ACE_ERROR ((LM_ERROR, option[DUP]
+ ? "%d input keys have identical hash values, examine output carefully...\n"
+ : "%d input keys have identical hash values,\ntry different key positions or use option -D.\n%a", total_duplicates, 1));
+ if (option[ALLCHARS])
+ option.set_keysig_size (max_key_len);
+ }
+}
+
+/* Recursively merges two sorted lists together to form one sorted list. The
+ ordering criteria is by frequency of occurrence of elements in the key set
+ or by the hash value. This is a kludge, but permits nice sharing of
+ almost identical code without incurring the overhead of a function
+ call comparison. */
+
+List_Node *
+Key_List::merge (List_Node *list1, List_Node *list2)
+{
+ if (!list1)
+ return list2;
+ else if (!list2)
+ return list1;
+ else if (occurrence_sort && list1->occurrence < list2->occurrence
+ || hash_sort && list1->hash_value > list2->hash_value)
+ {
+ list2->next = merge (list2->next, list1);
+ return list2;
+ }
+ else
+ {
+ list1->next = merge (list1->next, list2);
+ return list1;
+ }
+}
+
+/* Applies the merge sort algorithm to recursively sort the key list by
+ frequency of occurrence of elements in the key set. */
+
+List_Node *
+Key_List::merge_sort (List_Node *a_head)
+{
+ if (!a_head || !a_head->next)
+ return a_head;
+ else
+ {
+ List_Node *middle = a_head;
+ List_Node *temp = a_head->next->next;
+
+ while (temp)
+ {
+ temp = temp->next;
+ middle = middle->next;
+ if (temp)
+ temp = temp->next;
+ }
+
+ temp = middle->next;
+ middle->next = 0;
+ return merge (merge_sort (a_head), merge_sort (temp));
+ }
+}
+
+/* Returns the frequency of occurrence of elements in the key set. */
+
+inline int
+Key_List::get_occurrence (List_Node *ptr)
+{
+ int value = 0;
+
+ for (char *temp = ptr->char_set; *temp; temp++)
+ value += Vectors::occurrences[*temp];
+
+ return value;
+}
+
+/* Enables the index location of all key set elements that are now
+ determined. */
+
+inline void
+Key_List::set_determined (List_Node *ptr)
+{
+ for (char *temp = ptr->char_set; *temp; temp++)
+ determined[*temp] = 1;
+}
+
+/* Returns TRUE if PTR's key set is already completely determined. */
+
+inline int
+Key_List::already_determined (List_Node *ptr)
+{
+ int is_determined = 1;
+
+ for (char *temp = ptr->char_set; is_determined && *temp; temp++)
+ is_determined = determined[*temp];
+
+ return is_determined;
+}
+
+/* Reorders the table by first sorting the list so that frequently occuring
+ keys appear first, and then the list is reorded so that keys whose values
+ are already determined will be placed towards the front of the list. This
+ helps prune the search time by handling inevitable collisions early in the
+ search process. See Cichelli's paper from Jan 1980 JACM for details.... */
+
+void
+Key_List::reorder (void)
+{
+ List_Node *ptr;
+
+ for (ptr = head; ptr; ptr = ptr->next)
+ ptr->occurrence = get_occurrence (ptr);
+
+ occurrence_sort = !(hash_sort = 0); /* Pretty gross, eh?! */
+
+ for (ptr = head = merge_sort (head); ptr->next; ptr = ptr->next)
+ {
+ set_determined (ptr);
+
+ if (already_determined (ptr->next))
+ continue;
+ else
+ {
+ List_Node *trail_ptr = ptr->next;
+ List_Node *run_ptr = trail_ptr->next;
+
+ for (; run_ptr; run_ptr = trail_ptr->next)
+ {
+
+ if (already_determined (run_ptr))
+ {
+ trail_ptr->next = run_ptr->next;
+ run_ptr->next = ptr->next;
+ ptr = ptr->next = run_ptr;
+ }
+ else
+ trail_ptr = run_ptr;
+ }
+ }
+ }
+}
+
+/* Outputs the maximum and minimum hash values. Since the
+ list is already sorted by hash value all we need to do is
+ find the final item! */
+
+void
+Key_List::output_min_max ()
+{
+ List_Node *temp;
+ for (temp = head; temp->next; temp = temp->next)
+ ;
+
+ min_hash_value = head->hash_value;
+ max_hash_value = temp->hash_value;
+
+ if (!option[ENUM])
+ printf ("\n#define TOTAL_KEYWORDS %d\n#define MIN_WORD_LENGTH %d"
+ "\n#define MAX_WORD_LENGTH %d\n#define MIN_HASH_VALUE %d"
+ "\n#define MAX_HASH_VALUE %d\n#define HASH_VALUE_RANGE %d"
+ "\n#define DUPLICATES %d\n\n",
+ total_keys, min_key_len, max_key_len, min_hash_value,
+ max_hash_value, max_hash_value - min_hash_value + 1,
+ total_duplicates ? total_duplicates + 1 : 0);
+ else if (option[GLOBAL])
+ printf ("enum\n{\n"
+ " TOTAL_KEYWORDS = %d,\n"
+ " MIN_WORD_LENGTH = %d,\n"
+ " MAX_WORD_LENGTH = %d,\n"
+ " MIN_HASH_VALUE = %d,\n"
+ " MAX_HASH_VALUE = %d,\n"
+ " HASH_VALUE_RANGE = %d,\n"
+ " DUPLICATES = %d\n};\n\n",
+ total_keys, min_key_len, max_key_len, min_hash_value,
+ max_hash_value, max_hash_value - min_hash_value + 1,
+ total_duplicates ? total_duplicates + 1 : 0);
+}
+
+/* Generates the output using a C switch. This trades increased
+ search time for decreased table space (potentially *much* less
+ space for sparse tables). It the user has specified their own
+ struct in the keyword file *and* they enable the POINTER option we
+ have extra work to do. The solution here is to maintain a local
+ static array of user defined struct's, as with the
+ Output_Lookup_Function. Then we use for switch statements to
+ perform either a strcmp or strncmp, returning 0 if the str fails to
+ match, and otherwise returning a pointer to appropriate index
+ location in the local static array. */
+
+void
+Key_List::output_switch (void)
+{
+ char *comp_buffer;
+ List_Node *curr = head;
+ int pointer_and_type_enabled = option[POINTER] && option[TYPE];
+ int total_switches = option.get_total_switches ();
+ int switch_size = keyword_list_length () / total_switches;
+
+ if (pointer_and_type_enabled)
+ {
+#if defined (__GNUG__)
+ comp_buffer = (char *) alloca (strlen ("charmap[*str] == *resword->%s && !strncasecmp (str + 1, resword->%s + 1, len - 1)")
+ + 2 * strlen (option.get_key_name ()) + 1);
+#else
+ comp_buffer = new char [strlen ("charmap[*str] == *resword->%s && !strncasecmp (str + 1, resword->%s + 1, len - 1)")
+ + 2 * strlen (option.get_key_name ()) + 1];
+#endif
+ if (option[COMP])
+ sprintf (comp_buffer, "%s == *resword->%s && !%s (str + 1, resword->%s + 1, len - 1)",
+ option[STRCASECMP] ? "charmap[*str]" : "*str", option.get_key_name (),
+ option[STRCASECMP] ? "strncasecmp" : "strncmp", option.get_key_name ());
+ else
+ sprintf (comp_buffer, "%s == *resword->%s && !%s (str + 1, resword->%s + 1)",
+ option[STRCASECMP] ? "charmap[*str]" : "*str", option.get_key_name (),
+ option[STRCASECMP] ? "strcasecmp" : "strcmp", option.get_key_name ());
+ }
+ else
+ {
+ if (option[COMP])
+ comp_buffer = option[STRCASECMP]
+ ? "charmap[*str] == *resword && !strncasecmp (str + 1, resword + 1, len - 1)"
+ : "*str == *resword && !strncmp (str + 1, resword + 1, len - 1)";
+ else
+ comp_buffer = option[STRCASECMP]
+ ? "charmap[*str] == *resword && !strcasecmp (str + 1, resword + 1, len - 1)"
+ : "*str == *resword && !strcmp (str + 1, resword + 1, len - 1)";
+ }
+ if (!option[OPTIMIZE])
+ printf (" if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)\n {\n");
+ printf (" register int key = %s (str, len);\n\n", option.get_hash_name ());
+ if (!option[OPTIMIZE])
+ printf (" if (key <= MAX_HASH_VALUE && key >= MIN_HASH_VALUE)\n");
+
+ printf (" {\n");
+
+ /* Properly deal with user's who request multiple switch statements. */
+
+ while (curr)
+ {
+ List_Node *temp = curr;
+ int lowest_case_value = curr->hash_value;
+ int number_of_cases = 0;
+
+ /* Figure out a good cut point to end this switch. */
+
+ for (; temp && ++number_of_cases < switch_size; temp = temp->next)
+ if (temp->next && temp->hash_value == temp->next->hash_value)
+ while (temp->next && temp->hash_value == temp->next->hash_value)
+ temp = temp->next;
+
+ if (temp && total_switches != 1)
+ printf (" if (key <= %d)\n {\n", temp->hash_value);
+ else
+ printf (" {\n");
+
+ /* Output each keyword as part of a switch statement indexed by hash value. */
+
+ if (option[POINTER] || option[DUP])
+ {
+ int i = 0;
+
+ printf (" %s%s *resword; %s\n\n",
+ option[CONST] ? "const " : "",
+ pointer_and_type_enabled ? struct_tag : "char",
+ option[LENTABLE] && !option[DUP] ? "int key_len;" : "");
+ if (total_switches == 1)
+ {
+ printf (" switch (key)\n {\n");
+ lowest_case_value = 0;
+ }
+ else
+ printf (" switch (key - %d)\n {\n", lowest_case_value);
+
+ for (temp = curr; temp && ++i <= number_of_cases; temp = temp->next)
+ {
+ printf (" case %*d:", field_width, temp->hash_value - lowest_case_value);
+ if (option[DEBUG])
+ printf (" /* hash value = %4d, keyword = \"%s\" */", temp->hash_value, temp->key);
+ putchar ('\n');
+
+ /* Handle `natural links,' i.e., those that occur statically. */
+
+ if (temp->link)
+ {
+ List_Node *links;
+
+ for (links = temp; links; links = links->link)
+ {
+ if (pointer_and_type_enabled)
+ printf (" resword = &wordlist[%d];\n", links->index);
+ else
+ printf (" resword = \"%s\";\n", links->key);
+ printf (" if (%s) return resword;\n", comp_buffer);
+ }
+ }
+ /* Handle unresolved duplicate hash values. These are guaranteed
+ to be adjacent since we sorted the keyword list by increasing
+ hash values. */
+ if (temp->next && temp->hash_value == temp->next->hash_value)
+ {
+
+ for ( ; temp->next && temp->hash_value == temp->next->hash_value;
+ temp = temp->next)
+ {
+ if (pointer_and_type_enabled)
+ printf (" resword = &wordlist[%d];\n", temp->index);
+ else
+ printf (" resword = \"%s\";\n", temp->key);
+ printf (" if (%s) return resword;\n", comp_buffer);
+ }
+ if (pointer_and_type_enabled)
+ printf (" resword = &wordlist[%d];\n", temp->index);
+ else
+ printf (" resword = \"%s\";\n", temp->key);
+ printf (" return %s ? resword : 0;\n", comp_buffer);
+ }
+ else if (temp->link)
+ printf (" return 0;\n");
+ else
+ {
+ if (pointer_and_type_enabled)
+ printf (" resword = &wordlist[%d];", temp->index);
+ else
+ printf (" resword = \"%s\";", temp->key);
+ if (option[LENTABLE] && !option[DUP])
+ printf (" key_len = %d;", temp->length);
+ printf (" break;\n");
+ }
+ }
+ printf (" default: return 0;\n }\n");
+ if (option[OPTIMIZE])
+ printf (" return resword;\n");
+ else
+ {
+ printf (option[LENTABLE] && !option[DUP]
+ ? " if (len == key_len && %s)\n return resword;\n"
+ : " if (%s)\n return resword;\n", comp_buffer);
+ printf (" return 0;\n");
+ }
+ printf (" }\n");
+ curr = temp;
+ }
+ else /* Nothing special required here. */
+ {
+ int i = 0;
+ printf (" char *s;\n\n switch (key - %d)\n {\n",
+ lowest_case_value);
+
+ for (temp = curr; temp && ++i <= number_of_cases; temp = temp->next)
+ if (option[LENTABLE])
+ printf (" case %*d: if (len == %d) s = \"%s\"; else return 0; break;\n",
+ field_width, temp->hash_value - lowest_case_value,
+ temp->length, temp->key);
+ else
+ printf (" case %*d: s = \"%s\"; break;\n",
+ field_width, temp->hash_value - lowest_case_value, temp->key);
+
+ printf (" default: return 0;\n }\n ");
+ if (option[COMP])
+ printf ("return %s == *s && !%s;\n }\n",
+ option[STRCASECMP] ? "charmap[*str]" : "*str",
+ option[STRCASECMP] ? "strncasecmp (s + 1, str + 1, len - 1)" : "strcmp (s + 1, str + 1)");
+ else
+ printf ("return %s == *s && !%s;\n }\n",
+ option[STRCASECMP] ? "charmap[*str]" : "*str",
+ option[STRCASECMP] ? "strcasecmp (s + 1, str + 1, len - 1)" : "strcmp (s + 1, str + 1)");
+ curr = temp;
+ }
+ }
+ printf (" }\n %s\n}\n", option[OPTIMIZE] ? "" : "}\n return 0;");
+}
+
+/* Prints out a table of keyword lengths, for use with the
+ comparison code in generated function ``in_word_set.'' */
+
+void
+Key_List::output_keylength_table (void)
+{
+ const int max_column = 15;
+ int index = 0;
+ int column = 0;
+ char *indent = option[GLOBAL] ? "" : " ";
+ List_Node *temp;
+
+ if (!option[DUP] && !option[SWITCH])
+ {
+ printf ("\n%sstatic %sunsigned %s lengthtable[] =\n%s%s{\n ",
+ indent, option[CONST] ? "const " : "",
+ max_key_len <= UCHAR_MAX ? "char" : (max_key_len <= USHRT_MAX ? "short" : "long"),
+ indent, indent);
+
+ for (temp = head; temp; temp = temp->next, index++)
+ {
+
+ if (index < temp->hash_value)
+ for ( ; index < temp->hash_value; index++)
+ printf ("%3d,%s", 0, ++column % (max_column - 1) ? "" : "\n ");
+
+ printf ("%3d,%s", temp->length, ++column % (max_column - 1 ) ? "" : "\n ");
+ }
+
+ printf ("\n%s%s};\n", indent, indent);
+ }
+}
+/* Prints out the array containing the key words for the Gen_Perf
+ hash function. */
+
+void
+Key_List::output_keyword_table (void)
+{
+ char *l_brace = *head->rest ? "{" : "";
+ char *r_brace = *head->rest ? "}," : "";
+ char *indent = option[GLOBAL] ? "" : " ";
+ int index = 0;
+ List_Node *temp;
+
+ printf ("%sstatic %s%swordlist[] =\n%s%s{\n",
+ indent, option[CONST] ? "const " : "", struct_tag, indent, indent);
+
+ /* Skip over leading blank entries if there are no duplicates. */
+
+ if (0 < head->hash_value)
+ printf (" ");
+ for (int column = 1; index < head->hash_value; index++, column++)
+ printf ("%s\"\",%s %s", l_brace, r_brace, column % 9 ? "" : "\n ");
+ if (0 < head->hash_value && column % 10)
+ printf ("\n");
+
+ /* Generate an array of reserved words at appropriate locations. */
+
+ for (temp = head ; temp; temp = temp->next, index++)
+ {
+ temp->index = index;
+
+ if (!option[SWITCH] && (total_duplicates == 0 || !option[DUP]) && index < temp->hash_value)
+ {
+ int column;
+
+ printf (" ");
+
+ for (column = 1; index < temp->hash_value; index++, column++)
+ printf ("%s\"\",%s %s", l_brace, r_brace, column % 9 ? "" : "\n ");
+
+ if (column % 10)
+ printf ("\n");
+ else
+ {
+ printf ("%s\"%s\", %s%s", l_brace, temp->key, temp->rest, r_brace);
+ if (option[DEBUG])
+ printf (" /* hash value = %d, index = %d */", temp->hash_value, temp->index);
+ putchar ('\n');
+ continue;
+ }
+ }
+
+ printf (" %s\"%s\", %s%s", l_brace, temp->key, temp->rest, r_brace);
+ if (option[DEBUG])
+ printf (" /* hash value = %d, index = %d */", temp->hash_value, temp->index);
+ putchar ('\n');
+
+ /* Deal with links specially. */
+ if (temp->link)
+ for (List_Node *links = temp->link; links; links = links->link)
+ {
+ links->index = ++index;
+ printf (" %s\"%s\", %s%s", l_brace, links->key, links->rest, r_brace);
+ if (option[DEBUG])
+ printf (" /* hash value = %d, index = %d */", links->hash_value, links->index);
+ putchar ('\n');
+ }
+ }
+ printf ("%s%s};\n\n", indent, indent);
+}
+
+/* Generates C code for the hash function that returns the
+ proper encoding for each key word. */
+
+void
+Key_List::output_hash_function (void)
+{
+ const int max_column = 10;
+ int count = max_hash_value;
+
+ /* Calculate maximum number of digits required for MAX_HASH_VALUE. */
+
+ for (field_width = 2; (count /= 10) > 0; field_width++)
+ ;
+
+ if (option[GNU])
+ printf ("#ifdef __GNUC__\ninline\n#endif\n");
+
+ if (option[C])
+ printf ("static ");
+ printf ("unsigned int\n");
+ if (option[CPLUSPLUS])
+ printf ("%s::", option.get_class_name ());
+
+ printf (option[ANSI]
+ ? "%s (register const char *str, register int len)\n{\n static %sunsigned %s asso_values[] =\n {"
+ : "%s (str, len)\n register char *str;\n register int unsigned len;\n{\n static %sunsigned %s asso_values[] =\n {",
+ option.get_hash_name (), option[CONST] ? "const " : "",
+ max_hash_value <= UCHAR_MAX ? "char" : (max_hash_value <= USHRT_MAX ? "short" : "int"));
+
+ for (count = 0; count < ALPHA_SIZE; ++count)
+ {
+ if (!(count % max_column))
+ printf ("\n ");
+
+ printf ("%*d,",
+ field_width,
+ Vectors::occurrences[count] ? Vectors::asso_values[count] : max_hash_value + 1);
+ }
+
+ /* Optimize special case of ``-k 1,$'' */
+ if (option[DEFAULTCHARS])
+ {
+ if (option[STRCASECMP])
+ printf ("\n };\n return %sasso_values[charmap[str[len - 1]]] + asso_values[charmap[str[0]]];\n}\n\n",
+ option[NOLENGTH] ? "" : "len + ");
+ else
+ printf ("\n };\n return %sasso_values[str[len - 1]] + asso_values[str[0]];\n}\n\n",
+ option[NOLENGTH] ? "" : "len + ");
+ }
+ else
+ {
+ int key_pos;
+
+ option.reset ();
+
+ /* Get first (also highest) key position. */
+ key_pos = option.get ();
+
+ /* We can perform additional optimizations here. */
+ if (!option[ALLCHARS] && key_pos <= min_key_len)
+ {
+ printf ("\n };\n return %s", option[NOLENGTH] ? "" : "len + ");
+
+ for (; key_pos != WORD_END; )
+ {
+ printf (option[STRCASECMP] ? "asso_values[charmap[str[%d]]]" : "asso_values[str[%d]]", key_pos - 1);
+ if ((key_pos = option.get ()) != EOS)
+ printf (" + ");
+ else
+ break;
+ }
+
+ printf ("%s;\n}\n\n", key_pos == WORD_END
+ ? (option[STRCASECMP] ? "asso_values[charmap[str[len - 1]]]" : "asso_values[str[len - 1]]")
+ : "");
+ }
+
+ /* We've got to use the correct, but brute force, technique. */
+ else
+ {
+ printf ("\n };\n register int hval = %s;\n\n switch (%s)\n {\n default:\n",
+ option[NOLENGTH] ? "0" : "len", option[NOLENGTH] ? "len" : "hval");
+
+ /* User wants *all* characters considered in hash. */
+ if (option[ALLCHARS])
+ {
+ int i;
+
+ /* Break these options up for speed (gee, is this misplaced efficiency or what?! */
+ if (option[STRCASECMP])
+
+ for (i = max_key_len; i > 0; i--)
+ printf (" case %d:\n hval += asso_values[charmap[str[%d]]];\n", i, i - 1);
+
+ else
+
+ for (i = max_key_len; i > 0; i--)
+ printf (" case %d:\n hval += asso_values[str[%d]];\n", i, i - 1);
+
+ printf (" }\n return hval;\n}\n\n");
+ }
+ else /* do the hard part... */
+ {
+ count = key_pos + 1;
+
+ do
+ {
+
+ while (--count > key_pos)
+ printf (" case %d:\n", count);
+
+ printf (option[STRCASECMP]
+ ? " case %d:\n hval += asso_values[charmap[str[%d]]];\n"
+ : " case %d:\n hval += asso_values[str[%d]];\n",
+ key_pos, key_pos - 1);
+ }
+ while ((key_pos = option.get ()) != EOS && key_pos != WORD_END);
+
+ printf (" }\n return hval%s;\n}\n\n",
+ key_pos == WORD_END
+ ? (option[STRCASECMP] ? " + asso_values[charmap[str[len - 1]]]" : " + asso_values[str[len - 1]]")
+ : "");
+ }
+ }
+ }
+}
+
+/* Generates the large, sparse table that maps hash values into
+ the smaller, contiguous range of the keyword table. */
+
+void
+Key_List::output_lookup_array (void)
+{
+ if (total_duplicates > 0)
+ {
+ const int DEFAULT_VALUE = -1;
+
+ struct duplicate_entry
+ {
+ int hash_value; /* Hash value for this particular duplicate set. */
+ int index; /* Index into the main keyword storage array. */
+ int count; /* Number of consecutive duplicates at this index. */
+ };
+#if LARGE_STACK_ARRAYS
+ duplicate_entry duplicates[total_duplicates];
+ int lookup_array[max_hash_value + 1];
+#else
+ // Note: we don't use new, because that invokes a custom operator new.
+ duplicate_entry *duplicates = (duplicate_entry*)
+ malloc (total_duplicates * sizeof(duplicate_entry));
+ int *lookup_array = (int*)malloc(sizeof(int) * (max_hash_value + 1));
+ if (duplicates == NULL || lookup_array == NULL)
+ abort();
+#endif
+ duplicate_entry *dup_ptr = duplicates;
+ int *lookup_ptr = lookup_array + max_hash_value + 1;
+
+ while (lookup_ptr > lookup_array)
+ *--lookup_ptr = DEFAULT_VALUE;
+
+ for (List_Node *temp = head; temp; temp = temp->next)
+ {
+ int hash_value = temp->hash_value;
+ lookup_array[hash_value] = temp->index;
+ if (option[DEBUG])
+ fprintf (stderr, "keyword = %s, index = %d\n", temp->key, temp->index);
+ if (!temp->link &&
+ (!temp->next || hash_value != temp->next->hash_value))
+ continue;
+#if LARGE_STACK_ARRAYS
+ *dup_ptr = (duplicate_entry) { hash_value, temp->index, 1 };
+#else
+ duplicate_entry _dups;
+ _dups.hash_value = hash_value;
+ _dups.index = temp->index;
+ _dups.count = 1;
+ *dup_ptr = _dups;
+#endif
+
+ for (List_Node *ptr = temp->link; ptr; ptr = ptr->link)
+ {
+ dup_ptr->count++;
+ if (option[DEBUG])
+ fprintf (stderr, "static linked keyword = %s, index = %d\n", ptr->key, ptr->index);
+ }
+
+ while (temp->next && hash_value == temp->next->hash_value)
+ {
+ temp = temp->next;
+ dup_ptr->count++;
+ if (option[DEBUG])
+ fprintf (stderr, "dynamic linked keyword = %s, index = %d\n", temp->key, temp->index);
+
+ for (List_Node *ptr = temp->link; ptr; ptr = ptr->link)
+ {
+ dup_ptr->count++;
+ if (option[DEBUG])
+ fprintf (stderr, "static linked keyword = %s, index = %d\n", ptr->key, ptr->index);
+ }
+ }
+ dup_ptr++;
+ }
+
+ while (--dup_ptr >= duplicates)
+ {
+ if (option[DEBUG])
+ fprintf (stderr, "dup_ptr[%d]: hash_value = %d, index = %d, count = %d\n",
+ dup_ptr - duplicates, dup_ptr->hash_value, dup_ptr->index, dup_ptr->count);
+
+ /* Start searching for available space towards the right part of the lookup array. */
+ int i;
+ for (i = dup_ptr->hash_value; i < max_hash_value; i++)
+ if (lookup_array[i] == DEFAULT_VALUE && lookup_array[i + 1] == DEFAULT_VALUE)
+ {
+ lookup_array[i] = -dup_ptr->index;
+ lookup_array[i + 1] = -dup_ptr->count;
+ lookup_array[dup_ptr->hash_value] = max_hash_value + (i - dup_ptr->hash_value);
+ break;
+ }
+
+ /* If we didn't find it to the right look to the left instead... */
+ if (i == max_hash_value)
+ {
+
+ for (i = dup_ptr->hash_value; i > 0; i--)
+ if (lookup_array[i] == DEFAULT_VALUE && lookup_array[i - 1] == DEFAULT_VALUE)
+ {
+ lookup_array[i - 1] = -dup_ptr->index;
+ lookup_array[i] = -dup_ptr->count;
+ lookup_array[dup_ptr->hash_value] = -(max_hash_value + (dup_ptr->hash_value - i + 1));
+ break;
+ }
+
+ /* We are in *big* trouble if this happens! */
+ assert (i != 0);
+ }
+ }
+
+ int max = INT_MIN;
+ lookup_ptr = lookup_array + max_hash_value + 1;
+ while (lookup_ptr > lookup_array)
+ {
+ int val = abs (*--lookup_ptr);
+ if (max < val)
+ max = val;
+ }
+
+ char *indent = option[GLOBAL] ? "" : " ";
+ printf ("%sstatic %s%s lookup[] =\n%s%s{\n ", indent, option[CONST] ? "const " : "",
+ max <= SCHAR_MAX ? "char" : (max <= USHRT_MAX ? "short" : "int"),
+ indent, indent);
+
+ int count = max;
+
+ /* Calculate maximum number of digits required for MAX_HASH_VALUE. */
+
+ for (field_width = 2; (count /= 10) > 0; field_width++)
+ ;
+
+ const int max_column = 15;
+ int column = 0;
+
+ for (lookup_ptr = lookup_array;
+ lookup_ptr < lookup_array + max_hash_value + 1;
+ lookup_ptr++)
+ printf ("%*d,%s", field_width, *lookup_ptr, ++column % (max_column - 1) ? "" : "\n ");
+
+ printf ("\n%s%s};\n\n", indent, indent);
+#if !LARGE_STACK_ARRAYS
+ free (duplicates);
+ free (lookup_array);
+#endif
+ }
+}
+/* Generates C code to perform the keyword lookup. */
+
+void
+Key_List::output_lookup_function (void)
+{
+ if (!option[OPTIMIZE])
+ printf (" if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)\n {\n");
+ printf (" register int key = %s (str, len);\n\n", option.get_hash_name ());
+ if (!option[OPTIMIZE])
+ printf (" if (key <= MAX_HASH_VALUE && key >= MIN_HASH_VALUE)\n");
+ printf (" {\n");
+
+ if (option[DUP] && total_duplicates > 0)
+ {
+ printf (" register int index = lookup[key];\n\n"
+ " if (index >= 0 && index < MAX_HASH_VALUE)\n");
+ if (option[OPTIMIZE])
+ printf (" return %swordlist[index];\n", option[TYPE] && option[POINTER] ? "&" : "");
+ else
+ {
+ printf (" {\n"
+ " register %schar *s = wordlist[index]", option[CONST] ? "const " : "");
+ if (array_type != default_array_type)
+ printf (".%s", option.get_key_name ());
+
+ printf (";\n\n if (%s%s == *s && !%s)\n return %s;\n }\n",
+ option[LENTABLE] ? "len == lengthtable[key]\n && " : "",
+ option[STRCASECMP] ? "charmap[*str]" : "*str",
+ option[COMP] ? (option[STRCASECMP] ? "strncasecmp (str + 1, s + 1, len - 1)" : "strncmp (str + 1, s + 1, len - 1)")
+ : (option[STRCASECMP] ? "strcasecmp (str + 1, s + 1)" : "strcmp (str + 1, s + 1)"),
+ option[TYPE] && option[POINTER] ? "&wordlist[index]" : "s");
+ printf (" else if (index < 0 && index >= -MAX_HASH_VALUE)\n"
+ " return 0;\n");
+ }
+ printf (" else\n {\n"
+ " register int offset = key + index + (index > 0 ? -MAX_HASH_VALUE : MAX_HASH_VALUE);\n"
+ " register %s%s*base = &wordlist[-lookup[offset]];\n"
+ " register %s%s*ptr = base + -lookup[offset + 1];\n\n"
+ " while (--ptr >= base)\n ",
+ option[CONST] ? "const " : "", struct_tag,
+ option[CONST] ? "const " : "", struct_tag);
+ if (array_type != default_array_type)
+ {
+ if (option[COMP])
+ printf ("if (%s == *ptr->%s && !%s (str + 1, ptr->%s + 1, len - 1",
+ option[STRCASECMP] ? "charmap[*str]" : "*str", option.get_key_name (),
+ option[STRCASECMP] ? "strncasecmp" : "strncmp", option.get_key_name ());
+ else
+ printf ("if (%s == *ptr->%s && !%s (str + 1, ptr->%s + 1",
+ option[STRCASECMP] ? "charmap[*str]" : "*str", option.get_key_name (),
+ option[STRCASECMP] ? "strcasecmp" : "strcmp", option.get_key_name ());
+ }
+ else
+ printf (option[STRCASECMP] ? "if (charmap[*str] == **ptr && !%s" : "if (*str == **ptr && !%s",
+ option[COMP]
+ ? (option[STRCASECMP] ? "strncasecmp (str + 1, *ptr + 1, len - 1" : "strncmp (str + 1, *ptr + 1, len - 1")
+ : (option[STRCASECMP] ? "strcasecmp (str + 1, *ptr + 1" : "strcmp (str + 1, *ptr + 1"));
+ printf ("))\n return %sptr;"
+ "\n }\n }\n %s\n}\n", array_type ==
+ default_array_type ? "*" : "", option[OPTIMIZE] ? "" : "}\n return 0;");
+ }
+ else
+ {
+ if (option[OPTIMIZE])
+ printf (" return %swordlist[key]", option[TYPE] && option[POINTER] ? "&" : "");
+ else
+ {
+ printf (" register %schar *s = wordlist[key]", option[CONST] ? "const " : "");
+
+ if (array_type != default_array_type)
+ printf (".%s", option.get_key_name ());
+
+ printf (";\n\n if (%s%s == *s && !%s)\n return %s",
+ option[LENTABLE] ? "len == lengthtable[key]\n && " : "",
+ option[STRCASECMP] ? "charmap[*str]" : "*str",
+ option[COMP]
+ ? (option[STRCASECMP] ? "strncasecmp (str + 1, s + 1, len - 1)" : "strncmp (str + 1, s + 1, len - 1)")
+ : (option[STRCASECMP] ? "strcasecmp (str + 1, s + 1)" : "strcmp (str + 1, s + 1)"),
+ option[TYPE] && option[POINTER] ? "&wordlist[key]" : "s");
+ }
+ printf (";\n }\n %s\n}\n", option[OPTIMIZE] ? "" : "}\n return 0;");
+ }
+}
+
+/* Output the table and the functions that map upper case into lower case! */
+
+void
+Key_List::output_strcasecmp (void)
+{
+ printf ("%s",
+ "/* This array is designed for mapping upper and lower case letter\n"
+ " * together for a case independent comparison. The mappings are\n"
+ " * based upon ascii character sequences.\n */"
+ "static char charmap[] = {\n"
+ " '\\000', '\\001', '\\002', '\\003', '\\004', '\\005', '\\006', '\\007',\n"
+ " '\\010', '\\011', '\\012', '\\013', '\\014', '\\015', '\\016', '\\017',\n"
+ " '\\020', '\\021', '\\022', '\\023', '\\024', '\\025', '\\026', '\\027',\n"
+ " '\\030', '\\031', '\\032', '\\033', '\\034', '\\035', '\\036', '\\037',\n"
+ " '\\040', '\\041', '\\042', '\\043', '\\044', '\\045', '\\046', '\\047',\n"
+ " '\\050', '\\051', '\\052', '\\053', '\\054', '\\055', '\\056', '\\057',\n"
+ " '\\060', '\\061', '\\062', '\\063', '\\064', '\\065', '\\066', '\\067',\n"
+ " '\\070', '\\071', '\\072', '\\073', '\\074', '\\075', '\\076', '\\077',\n"
+ " '\\100', '\\141', '\\142', '\\143', '\\144', '\\145', '\\146', '\\147',\n"
+ " '\\150', '\\151', '\\152', '\\153', '\\154', '\\155', '\\156', '\\157',\n"
+ " '\\160', '\\161', '\\162', '\\163', '\\164', '\\165', '\\166', '\\167',\n"
+ " '\\170', '\\171', '\\172', '\\133', '\\134', '\\135', '\\136', '\\137',\n"
+ " '\\140', '\\141', '\\142', '\\143', '\\144', '\\145', '\\146', '\\147',\n"
+ " '\\150', '\\151', '\\152', '\\153', '\\154', '\\155', '\\156', '\\157',\n"
+ " '\\160', '\\161', '\\162', '\\163', '\\164', '\\165', '\\166', '\\167',\n"
+ " '\\170', '\\171', '\\172', '\\173', '\\174', '\\175', '\\176', '\\177',\n"
+ " '\\200', '\\201', '\\202', '\\203', '\\204', '\\205', '\\206', '\\207',\n"
+ " '\\210', '\\211', '\\212', '\\213', '\\214', '\\215', '\\216', '\\217',\n"
+ " '\\220', '\\221', '\\222', '\\223', '\\224', '\\225', '\\226', '\\227',\n"
+ " '\\230', '\\231', '\\232', '\\233', '\\234', '\\235', '\\236', '\\237',\n"
+ " '\\240', '\\241', '\\242', '\\243', '\\244', '\\245', '\\246', '\\247',\n"
+ " '\\250', '\\251', '\\252', '\\253', '\\254', '\\255', '\\256', '\\257',\n"
+ " '\\260', '\\261', '\\262', '\\263', '\\264', '\\265', '\\266', '\\267',\n"
+ " '\\270', '\\271', '\\272', '\\273', '\\274', '\\275', '\\276', '\\277',\n"
+ " '\\300', '\\341', '\\342', '\\343', '\\344', '\\345', '\\346', '\\347',\n"
+ " '\\350', '\\351', '\\352', '\\353', '\\354', '\\355', '\\356', '\\357',\n"
+ " '\\360', '\\361', '\\362', '\\363', '\\364', '\\365', '\\366', '\\367',\n"
+ " '\\370', '\\371', '\\372', '\\333', '\\334', '\\335', '\\336', '\\337',\n"
+ " '\\340', '\\341', '\\342', '\\343', '\\344', '\\345', '\\346', '\\347',\n"
+ " '\\350', '\\351', '\\352', '\\353', '\\354', '\\355', '\\356', '\\357',\n"
+ " '\\360', '\\361', '\\362', '\\363', '\\364', '\\365', '\\366', '\\367',\n"
+ " '\\370', '\\371', '\\372', '\\373', '\\374', '\\375', '\\376', '\\377',\n};\n\nstatic int\n");
+ if (option[COMP])
+ {
+ printf ("%s", option[ANSI]
+ ? "strncasecmp (register char *s1, register char *s2, register int n)"
+ : "strncasecmp (s1, s2, n)\n register char *s1, *s2;\n register int n;");
+ printf ("\n{\n register char *cm = charmap;\n\n while (--n >= 0 && cm[*s1] == cm[*s2++])\n"
+ " if (*s1++ == '\\0')\n return 0;\n"
+ "\n return n < 0 ? 0 : cm[*s1] - cm[*--s2];\n}\n\n");
+ }
+ else
+ {
+ printf ("%s", option[ANSI]
+ ? "strcasecmp (register char *s1, register char *s2)"
+ : "strcasecmp (s1, s2)\n register char *s1, *s2;");
+ printf ("\n{\n register char *cm = charmap;\n\n while (cm[*s1] == cm[*s2++])\n"
+ " if (*s1++ == '\\0')\n return 0;\n"
+ "\n return cm[*s1] - cm[*--s2];\n}\n\n");
+ }
+}
+
+/* Generates the hash function and the key word recognizer function
+ based upon the user's Options. */
+
+void
+Key_List::output (void)
+{
+ printf ("%s\n", include_src);
+
+ if (option[TYPE] && !option[NOTYPE]) /* Output type declaration now, reference it later on.... */
+ printf ("%s;\n", array_type);
+
+ output_min_max ();
+
+ if (option[STRCASECMP])
+ output_strcasecmp ();
+ if (option[CPLUSPLUS])
+ printf ("class %s\n{\nprivate:\n"
+ " static unsigned int hash (const char *str, int len);\npublic:\n"
+ " static %s%s%s (const char *str, int len);\n};\n\n",
+ option.get_class_name (), option[CONST] ? "const " : "",
+ return_type, option.get_function_name ());
+
+ output_hash_function ();
+
+ if (option[GLOBAL])
+ if (option[SWITCH])
+ {
+ if (option[LENTABLE] && option[DUP])
+ output_keylength_table ();
+ if (option[POINTER] && option[TYPE])
+ output_keyword_table ();
+ }
+ else
+ {
+ if (option[LENTABLE])
+ output_keylength_table ();
+ output_keyword_table ();
+ output_lookup_array ();
+ }
+
+ if (option[GNU]) /* Use the inline keyword to remove function overhead. */
+ printf ("#ifdef __GNUC__\ninline\n#endif\n");
+
+ printf ("%s%s\n", option[CONST] ? "const " : "", return_type);
+ if (option[CPLUSPLUS])
+ printf ("%s::", option.get_class_name ());
+
+ printf (option[ANSI]
+ ? "%s (register const char *str, register int len)\n{\n"
+ : "%s (str, len)\n register char *str;\n register unsigned int len;\n{\n",
+ option.get_function_name ());
+
+ if (option[ENUM] && !option[GLOBAL])
+ printf (" enum\n {\n"
+ " TOTAL_KEYWORDS = %d,\n"
+ " MIN_WORD_LENGTH = %d,\n"
+ " MAX_WORD_LENGTH = %d,\n"
+ " MIN_HASH_VALUE = %d,\n"
+ " MAX_HASH_VALUE = %d,\n"
+ " HASH_VALUE_RANGE = %d,\n"
+ " DUPLICATES = %d\n };\n\n",
+ total_keys, min_key_len, max_key_len, min_hash_value,
+ max_hash_value, max_hash_value - min_hash_value + 1,
+ total_duplicates ? total_duplicates + 1 : 0);
+ /* Use the switch in place of lookup table. */
+ if (option[SWITCH])
+ {
+ if (!option[GLOBAL])
+ {
+ if (option[LENTABLE] && option[DUP])
+ output_keylength_table ();
+ if (option[POINTER] && option[TYPE])
+ output_keyword_table ();
+ }
+ output_switch ();
+ }
+ /* Use the lookup table, in place of switch. */
+ else
+ {
+ if (!option[GLOBAL])
+ {
+ if (option[LENTABLE])
+ output_keylength_table ();
+ output_keyword_table ();
+ }
+ if (!option[GLOBAL])
+ output_lookup_array ();
+ output_lookup_function ();
+ }
+
+ if (additional_code)
+ {
+ for (;;)
+ {
+ int c = getchar ();
+
+ if (c == EOF)
+ break;
+ else
+ putchar (c);
+ }
+ }
+
+ fflush (stdout);
+}
+
+/* Sorts the keys by hash value. */
+
+void
+Key_List::sort (void)
+{
+ hash_sort = 1;
+ occurrence_sort = 0;
+
+ head = merge_sort (head);
+}
+
+/* Dumps the key list to stderr stream. */
+
+void
+Key_List::dump ()
+{
+ int field_width = option.get_max_keysig_size ();
+
+ fprintf (stderr, "\nList contents are:\n(hash value, key length, index, %*s, keyword):\n",
+ field_width, "char_set");
+
+ for (List_Node *ptr = head; ptr; ptr = ptr->next)
+ fprintf (stderr, "%11d,%11d,%6d, %*s, %s\n",
+ ptr->hash_value, ptr->length, ptr->index,
+ field_width, ptr->char_set, ptr->key);
+}
+
+/* Simple-minded constructor action here... */
+
+Key_List::Key_List (void)
+{
+ total_keys = 1;
+ max_key_len = INT_MIN;
+ min_key_len = INT_MAX;
+ return_type = default_return_type;
+ array_type = struct_tag = default_array_type;
+ head = 0;
+ total_duplicates = 0;
+ additional_code = 0;
+}
+
+/* Returns the length of entire key list. */
+
+int
+Key_List::keyword_list_length (void)
+{
+ return list_len;
+}
+
+/* Returns length of longest key read. */
+
+int
+Key_List::max_key_length (void)
+{
+ return max_key_len;
+}
+
diff --git a/apps/gperf/src/Key_List.h b/apps/gperf/src/Key_List.h
new file mode 100644
index 00000000000..14276eb975d
--- /dev/null
+++ b/apps/gperf/src/Key_List.h
@@ -0,0 +1,116 @@
+/* -*- C++ -*- */
+// @(#)Key_List.h 1.1 10/18/96
+
+/* This may look like C code, but it is really -*- C++ -*- */
+
+/* Data and function member declarations for the keyword list class.
+
+ Copyright (C) 1989 Free Software Foundation, Inc.
+ written by Douglas C. Schmidt (schmidt@ics.uci.edu)
+
+This file is part of GNU GPERF.
+
+GNU GPERF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 1, or (at your option)
+any later version.
+
+GNU GPERF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU GPERF; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
+
+// The key word list is a useful abstraction that keeps track of
+// various pieces of information that enable that fast generation of
+// the Gen_Perf.hash function. A Key_List is a singly-linked list of
+// List_Nodes.
+
+#ifndef key_list_h
+#define key_list_h 1
+
+#include "Options.h"
+#include "List_Node.h"
+
+class Key_List
+{
+public:
+ Key_List (void);
+ ~Key_List (void);
+ int keyword_list_length (void);
+ int max_key_length (void);
+ void reorder (void);
+ void sort (void);
+ void read_keys (void);
+ void output (void);
+
+ List_Node *head;
+ // Points to the head of the linked list.
+
+ int total_duplicates;
+ // Total number of duplicate hash values.
+
+private:
+ static int get_occurrence (List_Node *ptr);
+ static int strcspn (const char *s, const char *reject);
+ static int already_determined (List_Node *ptr);
+ static void set_determined (List_Node *ptr);
+ void output_min_max (void);
+ void output_switch (void);
+ void output_keyword_table (void);
+ void output_keylength_table (void);
+ void output_hash_function (void);
+ void output_lookup_function (void);
+ void output_lookup_array (void);
+ void output_strcasecmp (void);
+ void set_output_types (void);
+ void dump (void);
+ char *get_array_type (void);
+ char *save_include_src (void);
+ char *get_special_input (char delimiter);
+ List_Node *merge (List_Node *list1, List_Node *list2);
+ List_Node *merge_sort (List_Node *head);
+
+ char *array_type;
+ // Pointer to the type for word list.
+
+ char *return_type;
+ // Pointer to return type for lookup function.
+
+ char *struct_tag;
+ // Shorthand for user-defined struct tag type.
+
+ char *include_src;
+ // C source code to be included verbatim.
+
+ int max_key_len;
+ // Maximum length of the longest keyword.
+
+ int min_key_len;
+ // Minimum length of the shortest keyword.
+
+ int min_hash_value;
+ // Minimum hash value for all keywords.
+
+ int max_hash_value;
+ // Maximum hash value for all keywords.
+
+ int occurrence_sort;
+ // True if sorting by occurrence.
+
+ int hash_sort;
+ // True if sorting by hash value.
+
+ int additional_code;
+ // True if any additional C code is included.
+
+ int list_len;
+ // Length of head's Key_List, not counting duplicates.
+
+ int total_keys;
+ // Total number of keys, counting duplicates.
+};
+#endif
diff --git a/apps/gperf/src/List_Node.cpp b/apps/gperf/src/List_Node.cpp
new file mode 100644
index 00000000000..d72cc699c13
--- /dev/null
+++ b/apps/gperf/src/List_Node.cpp
@@ -0,0 +1,110 @@
+/* Creates and initializes a new list node.
+// @(#)List_Node.cpp 1.1 10/18/96
+
+ Copyright (C) 1989 Free Software Foundation, Inc.
+ written by Douglas C. Schmidt (schmidt@ics.uci.edu)
+
+This file is part of GNU GPERF.
+
+GNU GPERF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 1, or (at your option)
+any later version.
+
+GNU GPERF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU GPERF; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
+
+#include "Vectors.h"
+#include "List_Node.h"
+
+/* Defined as a macro in string.h on some systems, which causes
+ conflicts. */
+#undef index
+
+/* Sorts the key set alphabetically to speed up subsequent operations.
+ Uses insertion sort since the set is probably quite small. */
+
+inline void
+List_Node::set_sort (char *base, int len)
+{
+ int i, j;
+
+ for (i = 0, j = len - 1; i < j; i++)
+ {
+ char curr, tmp;
+
+ for (curr = i + 1, tmp = base[curr]; curr > 0 && tmp < base[curr-1]; curr--)
+ base[curr] = base[curr - 1];
+
+ base[curr] = tmp;
+
+ }
+}
+
+/* Initializes a List_Node. This requires obtaining memory for the
+ CHAR_SET initializing them using the information stored in the
+ KEY_POSITIONS array in Options, and checking for simple errors.
+ It's important to note that KEY and REST are both pointers to the
+ different offsets into the same block of dynamic memory pointed to
+ by parameter K. The data member REST is used to store any
+ additional fields of the input file (it is set to the "" string if
+ Option[TYPE] is not enabled). This is useful if the user wishes to
+ incorporate a lookup structure, rather than just an array of keys.
+ Finally, KEY_NUMBER contains a count of the total number of keys
+ seen so far. This is used to initialize the INDEX field to some
+ useful value. */
+
+List_Node::List_Node (char *k, int len)
+ : key (k),
+ next (0),
+ index (0),
+ length (len),
+ link (0),
+ rest (option[TYPE] ? k + len + 1 : "")
+{
+ char *ptr = new char[(option[ALLCHARS] ? len : option.get_max_keysig_size ()) + 1];
+ char_set = ptr;
+ k[len] = '\0'; /* Null terminate KEY to separate it from REST. */
+
+ /* Lower case if STRCASECMP option is enabled. */
+ if (option[STRCASECMP])
+ for (char *p = k; *p; p++)
+ if (isupper (*p))
+ *p = tolower (*p);
+
+ if (option[ALLCHARS]) /* Use all the character position in the KEY. */
+ for (; *k; k++, ptr++)
+ ++Vectors::occurrences[*ptr = *k];
+ else /* Only use those character positions specified by the user. */
+ {
+ int i;
+
+ /* Iterate thru the list of key_positions, initializing occurrences table
+ and char_set (via char * pointer ptr). */
+
+ for (option.reset (); (i = option.get ()) != EOS; )
+ {
+ if (i == WORD_END) /* Special notation for last KEY position, i.e. '$'. */
+ *ptr = key[len - 1];
+ else if (i <= len) /* Within range of KEY length, so we'll keep it. */
+ *ptr = key[i - 1];
+ else /* Out of range of KEY length, so we'll just skip it. */
+ continue;
+ ++Vectors::occurrences[*ptr++];
+ }
+
+ /* Didn't get any hits and user doesn't want to consider the
+ keylength, so there are essentially no usable hash positions! */
+ if (ptr == char_set && option[NOLENGTH])
+ ACE_ERROR ((LM_ERROR, "Can't hash keyword %s with chosen key positions.\n%a", key, 1));
+ }
+ *ptr = '\0'; /* Terminate this bastard.... */
+ /* Sort the KEY_SET items alphabetically. */
+ set_sort (char_set, ptr - char_set);
+}
diff --git a/apps/gperf/src/List_Node.h b/apps/gperf/src/List_Node.h
new file mode 100644
index 00000000000..0cb512f0894
--- /dev/null
+++ b/apps/gperf/src/List_Node.h
@@ -0,0 +1,65 @@
+/* -*- C++ -*- */
+// @(#)List_Node.h 1.1 10/18/96
+
+/* This may look like C code, but it is really -*- C++ -*- */
+
+/* Data and function members for defining values and operations of a list node.
+
+ Copyright (C) 1989 Free Software Foundation, Inc.
+ written by Douglas C. Schmidt (schmidt@ics.uci.edu)
+
+This file is part of GNU GPERF.
+
+GNU GPERF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 1, or (at your option)
+any later version.
+
+GNU GPERF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU GPERF; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
+
+#ifndef list_node_h
+#define list_node_h 1
+
+#include "Options.h"
+
+struct List_Node
+{
+ List_Node (char *key, int len);
+ static void set_sort (char *base, int len);
+
+ List_Node *link;
+ // TRUE if key has an identical KEY_SET as another key.
+
+ List_Node *next;
+ // Points to next element on the list.
+
+ char *key;
+ // Each keyword string stored here.
+
+ char *rest;
+ // Additional information for building hash function.
+
+ char *char_set;
+ // Set of characters to hash, specified by user.
+
+ int length;
+ // Length of the key.
+
+ int hash_value;
+ // Hash value for the key.
+
+ int occurrence;
+ // A metric for frequency of key set occurrences.
+
+ int index;
+ // Position of this node relative to other nodes.
+};
+
+#endif
diff --git a/apps/gperf/src/Makefile b/apps/gperf/src/Makefile
new file mode 100644
index 00000000000..ca6221f7156
--- /dev/null
+++ b/apps/gperf/src/Makefile
@@ -0,0 +1,155 @@
+#----------------------------------------------------------------------------
+# @(#)Makefile 1.1 10/18/96
+#
+# Makefile for GPERF release
+#----------------------------------------------------------------------------
+
+BIN = gperf
+LIB = libGperf.a
+
+FILES = new \
+ Options \
+ Iterator \
+ Gen_Perf \
+ Key_List \
+ List_Node \
+ Hash_Table \
+ Bool_Array \
+ Vectors \
+ Version
+
+LSRC = $(addsuffix .cpp,$(FILES))
+LOBJ = $(addsuffix .o,$(FILES))
+
+LDLIBS = -lGperf
+
+VLDLIBS = $(LDLIBS:%=%$(VAR))
+
+BUILD = $(VLIB) $(VBIN)
+
+#----------------------------------------------------------------------------
+# Include macros and targets
+#----------------------------------------------------------------------------
+
+include $(WRAPPER_ROOT)/include/makeinclude/wrapper_macros.GNU
+include $(WRAPPER_ROOT)/include/makeinclude/macros.GNU
+include $(WRAPPER_ROOT)/include/makeinclude/rules.common.GNU
+include $(WRAPPER_ROOT)/include/makeinclude/rules.nonested.GNU
+include $(WRAPPER_ROOT)/include/makeinclude/rules.lib.GNU
+include $(WRAPPER_ROOT)/include/makeinclude/rules.bin.GNU
+include $(WRAPPER_ROOT)/include/makeinclude/rules.local.GNU
+
+# DO NOT DELETE THIS LINE -- g++dep uses it.
+# DO NOT PUT ANYTHING AFTER THIS LINE, IT WILL GO AWAY.
+
+.obj/new.o .shobj/new.so: new.cpp Options.h \
+ $(WRAPPER_ROOT)/ace/Log_Msg.h \
+ $(WRAPPER_ROOT)/ace/Log_Record.h \
+ $(WRAPPER_ROOT)/ace/ACE.h \
+ $(WRAPPER_ROOT)/ace/OS.h \
+ $(WRAPPER_ROOT)/ace/Time_Value.h \
+ $(WRAPPER_ROOT)/ace/config.h \
+ $(WRAPPER_ROOT)/ace/Trace.h \
+ $(WRAPPER_ROOT)/ace/ACE.i \
+ $(WRAPPER_ROOT)/ace/Log_Priority.h \
+ $(WRAPPER_ROOT)/ace/Log_Record.i
+.obj/Options.o .shobj/Options.so: Options.cpp \
+ $(WRAPPER_ROOT)/ace/Get_Opt.h \
+ $(WRAPPER_ROOT)/ace/ACE.h \
+ $(WRAPPER_ROOT)/ace/OS.h \
+ $(WRAPPER_ROOT)/ace/Time_Value.h \
+ $(WRAPPER_ROOT)/ace/config.h \
+ $(WRAPPER_ROOT)/ace/Trace.h \
+ $(WRAPPER_ROOT)/ace/ACE.i \
+ $(WRAPPER_ROOT)/ace/Log_Msg.h \
+ $(WRAPPER_ROOT)/ace/Log_Record.h \
+ $(WRAPPER_ROOT)/ace/Log_Priority.h \
+ $(WRAPPER_ROOT)/ace/Log_Record.i \
+ Options.h Iterator.h
+.obj/Iterator.o .shobj/Iterator.so: Iterator.cpp Iterator.h Options.h \
+ $(WRAPPER_ROOT)/ace/Log_Msg.h \
+ $(WRAPPER_ROOT)/ace/Log_Record.h \
+ $(WRAPPER_ROOT)/ace/ACE.h \
+ $(WRAPPER_ROOT)/ace/OS.h \
+ $(WRAPPER_ROOT)/ace/Time_Value.h \
+ $(WRAPPER_ROOT)/ace/config.h \
+ $(WRAPPER_ROOT)/ace/Trace.h \
+ $(WRAPPER_ROOT)/ace/ACE.i \
+ $(WRAPPER_ROOT)/ace/Log_Priority.h \
+ $(WRAPPER_ROOT)/ace/Log_Record.i
+.obj/Gen_Perf.o .shobj/Gen_Perf.so: Gen_Perf.cpp Gen_Perf.h Options.h \
+ $(WRAPPER_ROOT)/ace/Log_Msg.h \
+ $(WRAPPER_ROOT)/ace/Log_Record.h \
+ $(WRAPPER_ROOT)/ace/ACE.h \
+ $(WRAPPER_ROOT)/ace/OS.h \
+ $(WRAPPER_ROOT)/ace/Time_Value.h \
+ $(WRAPPER_ROOT)/ace/config.h \
+ $(WRAPPER_ROOT)/ace/Trace.h \
+ $(WRAPPER_ROOT)/ace/ACE.i \
+ $(WRAPPER_ROOT)/ace/Log_Priority.h \
+ $(WRAPPER_ROOT)/ace/Log_Record.i \
+ Key_List.h List_Node.h Bool_Array.h
+.obj/Key_List.o .shobj/Key_List.so: Key_List.cpp \
+ $(WRAPPER_ROOT)/ace/Read_Buffer.h \
+ $(WRAPPER_ROOT)/ace/ACE.h \
+ $(WRAPPER_ROOT)/ace/OS.h \
+ $(WRAPPER_ROOT)/ace/Time_Value.h \
+ $(WRAPPER_ROOT)/ace/config.h \
+ $(WRAPPER_ROOT)/ace/Trace.h \
+ $(WRAPPER_ROOT)/ace/ACE.i \
+ $(WRAPPER_ROOT)/ace/Log_Msg.h \
+ $(WRAPPER_ROOT)/ace/Log_Record.h \
+ $(WRAPPER_ROOT)/ace/Log_Priority.h \
+ $(WRAPPER_ROOT)/ace/Log_Record.i \
+ $(WRAPPER_ROOT)/ace/Malloc.h \
+ $(WRAPPER_ROOT)/ace/Malloc_T.h \
+ $(WRAPPER_ROOT)/ace/Synch.h \
+ $(WRAPPER_ROOT)/ace/Synch_T.h \
+ $(WRAPPER_ROOT)/ace/Memory_Pool.h \
+ $(WRAPPER_ROOT)/ace/Event_Handler.h \
+ $(WRAPPER_ROOT)/ace/Signal.h \
+ $(WRAPPER_ROOT)/ace/Set.h \
+ $(WRAPPER_ROOT)/ace/Mem_Map.h \
+ $(WRAPPER_ROOT)/ace/SV_Semaphore_Complex.h \
+ $(WRAPPER_ROOT)/ace/SV_Semaphore_Simple.h \
+ $(WRAPPER_ROOT)/ace/SV_Semaphore_Simple.i \
+ $(WRAPPER_ROOT)/ace/SV_Semaphore_Complex.i \
+ $(WRAPPER_ROOT)/ace/Read_Buffer.i \
+ Hash_Table.h Options.h List_Node.h Vectors.h Key_List.h
+.obj/List_Node.o .shobj/List_Node.so: List_Node.cpp Vectors.h List_Node.h Options.h \
+ $(WRAPPER_ROOT)/ace/Log_Msg.h \
+ $(WRAPPER_ROOT)/ace/Log_Record.h \
+ $(WRAPPER_ROOT)/ace/ACE.h \
+ $(WRAPPER_ROOT)/ace/OS.h \
+ $(WRAPPER_ROOT)/ace/Time_Value.h \
+ $(WRAPPER_ROOT)/ace/config.h \
+ $(WRAPPER_ROOT)/ace/Trace.h \
+ $(WRAPPER_ROOT)/ace/ACE.i \
+ $(WRAPPER_ROOT)/ace/Log_Priority.h \
+ $(WRAPPER_ROOT)/ace/Log_Record.i
+.obj/Hash_Table.o .shobj/Hash_Table.so: Hash_Table.cpp \
+ $(WRAPPER_ROOT)/ace/ACE.h \
+ $(WRAPPER_ROOT)/ace/OS.h \
+ $(WRAPPER_ROOT)/ace/Time_Value.h \
+ $(WRAPPER_ROOT)/ace/config.h \
+ $(WRAPPER_ROOT)/ace/Trace.h \
+ $(WRAPPER_ROOT)/ace/ACE.i \
+ $(WRAPPER_ROOT)/ace/Log_Msg.h \
+ $(WRAPPER_ROOT)/ace/Log_Record.h \
+ $(WRAPPER_ROOT)/ace/Log_Priority.h \
+ $(WRAPPER_ROOT)/ace/Log_Record.i \
+ Hash_Table.h Options.h List_Node.h
+.obj/Bool_Array.o .shobj/Bool_Array.so: Bool_Array.cpp Bool_Array.h Options.h \
+ $(WRAPPER_ROOT)/ace/Log_Msg.h \
+ $(WRAPPER_ROOT)/ace/Log_Record.h \
+ $(WRAPPER_ROOT)/ace/ACE.h \
+ $(WRAPPER_ROOT)/ace/OS.h \
+ $(WRAPPER_ROOT)/ace/Time_Value.h \
+ $(WRAPPER_ROOT)/ace/config.h \
+ $(WRAPPER_ROOT)/ace/Trace.h \
+ $(WRAPPER_ROOT)/ace/ACE.i \
+ $(WRAPPER_ROOT)/ace/Log_Priority.h \
+ $(WRAPPER_ROOT)/ace/Log_Record.i
+.obj/Version.o .shobj/Version.so: Version.cpp
+
+# IF YOU PUT ANYTHING HERE IT WILL GO AWAY
diff --git a/apps/gperf/src/Options.cpp b/apps/gperf/src/Options.cpp
new file mode 100644
index 00000000000..184187b5a4a
--- /dev/null
+++ b/apps/gperf/src/Options.cpp
@@ -0,0 +1,616 @@
+/* Handles parsing the Options provided to the user.
+// @(#)Options.cpp 1.1 10/18/96
+
+ Copyright (C) 1989 Free Software Foundation, Inc.
+ written by Douglas C. Schmidt (schmidt@ics.uci.edu)
+
+This file is part of GNU GPERF.
+
+GNU GPERF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 1, or (at your option)
+any later version.
+
+GNU GPERF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU GPERF; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
+
+#include "ace/Get_Opt.h"
+#include "Options.h"
+#include "Iterator.h"
+
+/* Global option coordinator for the entire program. */
+Options option;
+
+/* Current program version. */
+extern char *version_string;
+
+/* Size to jump on a collision. */
+static const int DEFAULT_JUMP_VALUE = 5;
+
+/* Default name for generated lookup function. */
+static const char *const DEFAULT_NAME = "in_word_set";
+
+/* Default name for the key component. */
+static const char *const DEFAULT_KEY = "name";
+
+/* Default name for the generated class. */
+static const char *const DEFAULT_CLASS_NAME = "Perfect_Hash";
+
+/* Default name for generated hash function. */
+static const char *const DEFAULT_HASH_NAME = "hash";
+
+/* Default delimiters that separate keywords from their attributes. */
+static const char *const DEFAULT_DELIMITERS = ",\n";
+
+int Options::option_word;
+int Options::total_switches;
+int Options::total_keysig_size;
+int Options::size;
+int Options::key_pos;
+int Options::jump;
+int Options::initial_asso_value;
+int Options::argument_count;
+int Options::iterations;
+char **Options::argument_vector;
+const char *Options::function_name;
+const char *Options::key_name;
+const char *Options::class_name;
+const char *Options::hash_name;
+const char *Options::delimiters;
+char Options::key_positions[MAX_KEY_POS];
+
+/* Prints program usage to standard error stream. */
+
+inline void
+Options::usage (void)
+{
+ ACE_ERROR ((LM_ERROR, "Usage: %n [-acCdDef[num]gGhH<hashname>i<init>Ijk<keys>K<keyname>lL<language>nN<function name>oOprs<size>S<switches>tTvZ<class name>].\n"
+ "(type %n -h for help)\n"));
+}
+
+/* Output command-line Options. */
+
+void
+Options::print_options (void)
+{
+ int i;
+
+ printf ("/* Command-line: ");
+
+ for (i = 0; i < argument_count; i++)
+ printf ("%s ", argument_vector[i]);
+
+ printf (" */");
+}
+
+/* Sorts the key positions *IN REVERSE ORDER!!*
+ This makes further routines more efficient. Especially when generating code.
+ Uses a simple Insertion Sort since the set is probably ordered.
+ Returns 1 if there are no duplicates, 0 otherwise. */
+
+inline int
+Options::key_sort (char *base, int len)
+{
+ int i, j;
+
+ for (i = 0, j = len - 1; i < j; i++)
+ {
+ int curr, tmp;
+
+ for (curr = i + 1,tmp = base[curr]; curr > 0 && tmp >= base[curr - 1]; curr--)
+ if ((base[curr] = base[curr - 1]) == tmp) /* oh no, a duplicate!!! */
+ return 0;
+
+ base[curr] = tmp;
+ }
+
+ return 1;
+}
+
+/* Sets the default Options. */
+
+Options::Options (void)
+{
+ key_positions[0] = WORD_START;
+ key_positions[1] = WORD_END;
+ key_positions[2] = EOS;
+ total_keysig_size = 2;
+ delimiters = DEFAULT_DELIMITERS;
+ jump = DEFAULT_JUMP_VALUE;
+ option_word = DEFAULTCHARS | C;
+ function_name = DEFAULT_NAME;
+ key_name = DEFAULT_KEY;
+ hash_name = DEFAULT_HASH_NAME;
+ class_name = DEFAULT_CLASS_NAME;
+ total_switches = size = 1;
+ initial_asso_value = iterations = 0;
+}
+
+/* Dumps option status when debug is set. */
+
+Options::~Options (void)
+{
+ if (option_word & DEBUG)
+ {
+ char *ptr;
+
+ fprintf (stderr, "\ndumping Options:\nDEBUG is.......: %s\nORDER is.......: %s"
+ "\nANSI is........: %s\nTYPE is........: %s\nGNU is.........: %s"
+ "\nRANDOM is......: %s\nDEFAULTCHARS is: %s\nSWITCH is......: %s"
+ "\nPOINTER is.....: %s\nNOLENGTH is....: %s\nLENTABLE is....: %s"
+ "\nDUP is.........: %s\nFAST is........: %s\nCOMP is.....: %s"
+ "\nNOTYPE is......: %s\nGLOBAL is......: %s\nCONST is....: %s"
+ "\nCPLUSPLUS is...: %s\nC is...........: %s\nENUM is.....: %s"
+ "\nSTRCASECMP is...: %s\nOPTIMIZE is...........: %s"
+ "\niterations = %d\nlookup function name = %s\nhash function name = %s"
+ "\nkey name = %s\njump value = %d\nmax associcated value = %d"
+ "\ninitial associated value = %d\ndelimiters = %s\nnumber of switch statements = %d\n",
+ option_word & DEBUG ? "enabled" : "disabled",
+ option_word & ORDER ? "enabled" : "disabled",
+ option_word & ANSI ? "enabled" : "disabled",
+ option_word & TYPE ? "enabled" : "disabled",
+ option_word & GNU ? "enabled" : "disabled",
+ option_word & RANDOM ? "enabled" : "disabled",
+ option_word & DEFAULTCHARS ? "enabled" : "disabled",
+ option_word & SWITCH ? "enabled" : "disabled",
+ option_word & POINTER ? "enabled" : "disabled",
+ option_word & NOLENGTH ? "enabled" : "disabled",
+ option_word & LENTABLE ? "enabled" : "disabled",
+ option_word & DUP ? "enabled" : "disabled",
+ option_word & FAST ? "enabled" : "disabled",
+ option_word & COMP ? "enabled" : "disabled",
+ option_word & NOTYPE ? "enabled" : "disabled",
+ option_word & GLOBAL ? "enabled" : "disabled",
+ option_word & CONST ? "enabled" : "disabled",
+ option_word & CPLUSPLUS ? "enabled" : "disabled",
+ option_word & C ? "enabled" : "disabled",
+ option_word & ENUM ? "enabled" : "disabled",
+ option_word & STRCASECMP ? "enabled" : "disabled",
+ option_word & OPTIMIZE ? "enabled" : "disabled",
+ iterations, function_name, hash_name, key_name, jump, size - 1,
+ initial_asso_value, delimiters, total_switches);
+ if (option_word & ALLCHARS)
+ fprintf (stderr, "all characters are used in the hash function\n");
+
+ fprintf (stderr, "maximum keysig size = %d\nkey positions are: \n",
+ total_keysig_size);
+
+ for (ptr = key_positions; *ptr != EOS; ptr++)
+ if (*ptr == WORD_END)
+ fprintf (stderr, "$\n");
+ else
+ fprintf (stderr, "%d\n", *ptr);
+
+ fprintf (stderr, "finished dumping Options\n");
+ }
+}
+
+
+/* Parses the command line Options and sets appropriate flags in option_word. */
+
+void
+Options::operator() (int argc, char *argv[])
+{
+ ACE_LOG_MSG->open (argv[0]);
+
+ ACE_Get_Opt getopt (argc, argv, "adcCDe:Ef:gGhH:i:Ij:k:K:lL:nN:oOprs:S:tTvZ:");
+ int option_char;
+
+ argument_count = argc;
+ argument_vector = argv;
+
+ while ((option_char = getopt ()) != -1)
+ {
+ switch (option_char)
+ {
+ case 'a': /* Generated coded uses the ANSI prototype format. */
+ {
+ option_word |= ANSI;
+ break;
+ }
+ case 'c': /* Generate strncmp rather than strcmp. */
+ {
+ option_word |= COMP;
+ break;
+ }
+ case 'C': /* Make the generated tables readonly (const). */
+ {
+ option_word |= CONST;
+ break;
+ }
+ case 'd': /* Enable debugging option. */
+ {
+ option_word |= DEBUG;
+ ACE_ERROR ((LM_ERROR, "Starting program %n, version %s, with debuggin on.\n",
+ version_string));
+ break;
+ }
+ case 'D': /* Enable duplicate option. */
+ {
+ option_word |= DUP;
+ break;
+ }
+ case 'e': /* Allows user to provide keyword/attribute separator */
+ {
+ option.delimiters = getopt.optarg;
+ break;
+ }
+ case 'E':
+ {
+ option_word |= ENUM;
+ break;
+ }
+ case 'f': /* Generate the hash table ``fast.'' */
+ {
+ option_word |= FAST;
+ if ((iterations = atoi (getopt.optarg)) < 0)
+ {
+ ACE_ERROR ((LM_ERROR, "iterations value must not be negative, assuming 0\n"));
+ iterations = 0;
+ }
+ break;
+ }
+ case 'g': /* Use the ``inline'' keyword for generated sub-routines. */
+ {
+ option_word |= GNU;
+ break;
+ }
+ case 'G': /* Make the keyword table a global variable. */
+ {
+ option_word |= GLOBAL;
+ break;
+ }
+ case 'h': /* Displays a list of helpful Options to the user. */
+ {
+ ACE_ERROR ((LM_ERROR,
+ "-a\tGenerate ANSI standard C output code, i.e., function prototypes.\n"
+ "-c\tGenerate comparison code using strncmp rather than strcmp.\n"
+ "-C\tMake the contents of generated lookup tables constant, i.e., readonly.\n"
+ "-d\tEnables the debugging option (produces verbose output to the standard error).\n"
+ "-D\tHandle keywords that hash to duplicate values. This is useful\n"
+ "\tfor certain highly redundant keyword sets. It enables the -S option.\n"
+ "-e\tAllow user to provide a string containing delimiters used to separate\n"
+ "\tkeywords from their attributes. Default is \",\\n\"\n"
+ "-E\tDefine constant values using an enum local to the lookup function\n"
+ "\trather than with defines\n"
+ "-f\tGenerate the gen-perf.hash function ``fast.'' This decreases GPERF's\n"
+ "\trunning time at the cost of minimizing generated table-size.\n"
+ "\tThe numeric argument represents the number of times to iterate when\n"
+ "\tresolving a collision. `0' means ``iterate by the number of keywords.''\n"
+ "-g\tAssume a GNU compiler, e.g., g++ or gcc. This makes all generated\n"
+ "\troutines use the ``inline'' keyword to remove cost of function calls.\n"
+ "-G\tGenerate the static table of keywords as a static global variable,\n"
+ "\trather than hiding it inside of the lookup function (which is the\n"
+ "\tdefault behavior).\n"
+ "-h\tPrints this mesage.\n"
+ "-H\tAllow user to specify name of generated hash function. Default\n"
+ "\tis `hash'.\n"
+ "-i\tProvide an initial value for the associate values array. Default is 0.\n"
+ "-I\tGenerate comparison code using case insensitive string comparison, e.g.,\n"
+ "\tstrncasecmp or strcasecmp.\n"
+ "\tSetting this value larger helps inflate the size of the final table.\n"
+ "-j\tAffects the ``jump value,'' i.e., how far to advance the associated\n"
+ "\tcharacter value upon collisions. Must be an odd number, default is %d.\n"
+ "-k\tAllows selection of the key positions used in the hash function.\n"
+ "\tThe allowable choices range between 1-%d, inclusive. The positions\n"
+ "\tare separated by commas, ranges may be used, and key positions may\n"
+ "\toccur in any order. Also, the meta-character '*' causes the generated\n"
+ "\thash function to consider ALL key positions, and $ indicates the\n"
+ "\t``final character'' of a key, e.g., $,1,2,4,6-10.\n"
+ "-K\tAllow use to select name of the keyword component in the keyword structure.\n"
+ "-l\tCompare key lengths before trying a string comparison. This helps\n"
+ "\tcut down on the number of string comparisons made during the lookup.\n"
+ "-L\tGenerates code in the language specified by the option's argument. Languages\n"
+ "\thandled are currently C++ and C. The default is C.\n"
+ "-n\tDo not include the length of the keyword when computing the hash function\n"
+ "-N\tAllow user to specify name of generated lookup function. Default\n"
+ "\tname is `in_word_set.'\n"
+ "-o\tReorders input keys by frequency of occurrence of the key sets.\n"
+ "\tThis should decrease the search time dramatically.\n"
+ "-O\tOptimize the generated lookup function by assuming that all input keywords \n"
+ "\tare members of the keyset from the keyfile.\n"
+ "-p\tChanges the return value of the generated function ``in_word_set''\n"
+ "\tfrom its default boolean value (i.e., 0 or 1), to type ``pointer\n"
+ "\tto wordlist array'' This is most useful when the -t option, allowing\n"
+ "\tuser-defined structs, is used.\n"
+ "-r\tUtilizes randomness to initialize the associated values table.\n"
+ "-s\tAffects the size of the generated hash table. The numeric argument\n"
+ "\tfor this option indicates ``how many times larger or smaller'' the associated\n"
+ "\tvalue range should be, in relationship to the number of keys, e.g. a value of 3\n"
+ "\tmeans ``allow the maximum associated value to be about 3 times larger than the\n"
+ "\tnumber of input keys.'' Conversely, a value of -3 means ``make the maximum\n"
+ "\tassociated value about 3 times smaller than the number of input keys.\n"
+ "\tA larger table should decrease the time required for an unsuccessful search,\n"
+ "\tat the expense of extra table space. Default value is 1.\n"
+ "-S\tCauses the generated C code to use a switch statement scheme, rather\n"
+ "\tthan an array lookup table. This can lead to a reduction in both\n"
+ "\ttime and space requirements for some keyfiles. The argument to\n"
+ "\tthis option determines how many switch statements are generated.\n"
+ "\tA value of 1 generates 1 switch containing all the elements, a value of 2\n"
+ "\tgenerates 2 tables with 1/2 the elements in each table, etc. This\n"
+ "\tis useful since many C compilers cannot correctly generate code for\n"
+ "\tlarge switch statements.\n"
+ "-t\tAllows the user to include a structured type declaration for \n"
+ "\tgenerated code. Any text before %%%% is consider part of the type\n"
+ "\tdeclaration. Key words and additional fields may follow this, one\n"
+ "\tgroup of fields per line.\n"
+ "-T\tPrevents the transfer of the type declaration to the output file.\n"
+ "\tUse this option if the type is already defined elsewhere.\n"
+ "-v\tPrints out the current version number\n"
+ "-Z\tAllow user to specify name of generated C++ class. Default\n"
+ "\tname is `Perfect_Hash.'\n%e%a", DEFAULT_JUMP_VALUE, (MAX_KEY_POS - 1), usage, 1));
+ }
+ case 'H': /* Sets the name for the hash function */
+ {
+ hash_name = getopt.optarg;
+ break;
+ }
+ case 'i': /* Sets the initial value for the associated values array. */
+ {
+ if ((initial_asso_value = atoi (getopt.optarg)) < 0)
+ ACE_ERROR ((LM_ERROR, "Initial value %d should be non-zero, ignoring and continuing.\n", initial_asso_value));
+ if (option[RANDOM])
+ ACE_ERROR ((LM_ERROR, "warning, -r option superceeds -i, ignoring -i option and continuing\n"));
+ break;
+ }
+ case 'I':
+ {
+ option_word |= STRCASECMP;
+ break;
+ }
+ case 'j': /* Sets the jump value, must be odd for later algorithms. */
+ {
+ if ((jump = atoi (getopt.optarg)) < 0)
+ ACE_ERROR ((LM_ERROR, "Jump value %d must be a positive number.\n%e%a", jump, usage, 1));
+ else if (jump && ACE_EVEN (jump))
+ ACE_ERROR ((LM_ERROR, "Jump value %d should be odd, adding 1 and continuing...\n", jump++));
+ break;
+ }
+ case 'k': /* Sets key positions used for hash function. */
+ {
+ const int BAD_VALUE = -1;
+ int value;
+ Iterator expand (getopt.optarg, 1, MAX_KEY_POS - 1, WORD_END, BAD_VALUE, EOS);
+
+ if (*getopt.optarg == '*') /* Use all the characters for hashing!!!! */
+ option_word = (option_word & ~DEFAULTCHARS) | ALLCHARS;
+ else
+ {
+ char *l_key_pos;
+
+ for (l_key_pos = key_positions; (value = expand ()) != EOS; l_key_pos++)
+ if (value == BAD_VALUE)
+ ACE_ERROR ((LM_ERROR, "Illegal key value or range, use 1,2,3-%d,'$' or '*'.\n%e%a",
+ MAX_KEY_POS - 1, usage, 1));
+ else
+ *l_key_pos = value;;
+
+ *l_key_pos = EOS;
+
+ if (! (total_keysig_size = (l_key_pos - key_positions)))
+ ACE_ERROR ((LM_ERROR, "No keys selected.\n%e%a", usage, 1));
+ else if (! key_sort (key_positions, total_keysig_size))
+ ACE_ERROR ((LM_ERROR, "Duplicate keys selected\n%e%a", usage, 1));
+
+ if (total_keysig_size != 2
+ || (key_positions[0] != 1 || key_positions[1] != WORD_END))
+ option_word &= ~DEFAULTCHARS;
+ }
+ break;
+ }
+ case 'K': /* Make this the keyname for the keyword component field. */
+ {
+ key_name = getopt.optarg;
+ break;
+ }
+ case 'l': /* Create length table to avoid extra string compares. */
+ {
+ option_word |= LENTABLE;
+ break;
+ }
+ case 'L': /* Deal with different generated languages. */
+ {
+ option_word &= ~C;
+ if (!strcmp (getopt.optarg, "C++"))
+ option_word |= (CPLUSPLUS | ANSI);
+ else if (!strcmp (getopt.optarg, "C"))
+ option_word |= C;
+ else
+ {
+ ACE_ERROR ((LM_ERROR, "unsupported language option %s, defaulting to C\n", getopt.optarg));
+ option_word |= C;
+ }
+ break;
+ }
+ case 'n': /* Don't include the length when computing hash function. */
+ {
+ option_word |= NOLENGTH;
+ break;
+ }
+ case 'N': /* Make generated lookup function name be optarg */
+ {
+ function_name = getopt.optarg;
+ break;
+ }
+ case 'o': /* Order input by frequency of key set occurrence. */
+ {
+ option_word |= ORDER;
+ break;
+ }
+ case 'O':
+ {
+ option_word |= OPTIMIZE;
+ break;
+ }
+ case 'p': /* Generated lookup function now a pointer instead of int. */
+ {
+ option_word |= POINTER;
+ break;
+ }
+ case 'r': /* Utilize randomness to initialize the associated values table. */
+ {
+ option_word |= RANDOM;
+ if (option.initial_asso_value != 0)
+ ACE_ERROR ((LM_ERROR, "warning, -r option superceeds -i, disabling -i option and continuing\n"));
+ break;
+ }
+ case 's': /* Range of associated values, determines size of final table. */
+ {
+ if (abs (size = atoi (getopt.optarg)) > 50)
+ ACE_ERROR ((LM_ERROR, "%d is excessive, did you really mean this?! (type %n -h for help)\n", size));
+ break;
+ }
+ case 'S': /* Generate switch statement output, rather than lookup table. */
+ {
+ option_word |= SWITCH;
+ if ((option.total_switches = atoi (getopt.optarg)) <= 0)
+ ACE_ERROR ((LM_ERROR, "number of switches %s must be a positive number\n%e%a", getopt.optarg, usage, 1));
+ break;
+ }
+ case 't': /* Enable the TYPE mode, allowing arbitrary user structures. */
+ {
+ option_word |= TYPE;
+ break;
+ }
+ case 'T': /* Don't print structure definition. */
+ {
+ option_word |= NOTYPE;
+ break;
+ }
+ case 'v': /* Print out the version and quit. */
+ ACE_ERROR ((LM_ERROR, "%n: version %s\n%e\n%a", version_string, usage, 1));
+ case 'Z': /* Set the class name. */
+ {
+ class_name = getopt.optarg;
+ break;
+ }
+ default:
+ ACE_ERROR ((LM_ERROR, "%e%a", usage, 1));
+ }
+
+ }
+
+ if (argv[getopt.optind] && ! freopen (argv[getopt.optind], "r", stdin))
+ ACE_ERROR ((LM_ERROR, "Cannot open keyword file %p\n%e%a", argv[getopt.optind], usage, 1));
+
+ if (++getopt.optind < argc)
+ ACE_ERROR ((LM_ERROR, "Extra trailing arguments to %n.\n%e%a", usage, 1));
+}
+
+int
+Options::operator[] (Option_Type option) /* True if option enable, else false. */
+{
+ return option_word & option;
+}
+
+void
+Options::operator = (enum Option_Type opt) /* Enables option OPT. */
+{
+ option_word |= opt;
+}
+
+void
+Options::operator != (enum Option_Type opt) /* Disables option OPT. */
+{
+ option_word &= ~opt;
+}
+
+void
+Options::reset (void) /* Initializes the key Iterator. */
+{
+ key_pos = 0;
+}
+
+int
+Options::get (void) /* Returns current key_position and advanced index. */
+{
+ return key_positions[key_pos++];
+}
+
+void
+Options::set_asso_max (int r) /* Sets the size of the table size. */
+{
+ size = r;
+}
+
+int
+Options::get_asso_max (void) /* Returns the size of the table size. */
+{
+ return size;
+}
+
+int
+Options::get_max_keysig_size (void) /* Returns total distinct key positions. */
+{
+ return total_keysig_size;
+}
+
+void
+Options::set_keysig_size (int a_size) /* Sets total distinct key positions. */
+{
+ total_keysig_size = a_size;
+}
+
+int
+Options::get_jump (void) /* Returns the jump value. */
+{
+ return jump;
+}
+
+const char *
+Options::get_function_name (void) /* Returns the generated function name. */
+{
+ return function_name;
+}
+
+const char *
+Options::get_key_name (void) /* Returns the keyword key name. */
+{
+ return key_name;
+}
+
+const char *
+Options::get_hash_name (void) /* Returns the hash function name. */
+{
+ return hash_name;
+}
+
+const char *
+Options::get_class_name (void) /* Returns the generated class name. */
+{
+ return class_name;
+}
+
+int
+Options::initial_value (void) /* Returns the initial associated character value. */
+{
+ return initial_asso_value;
+}
+
+int
+Options::get_iterations (void) /* Returns the iterations value. */
+{
+ return iterations;
+}
+
+const char *
+Options::get_delimiter () /* Returns the string used to delimit keywords from other attributes. */
+{
+ return delimiters;
+}
+
+int
+Options::get_total_switches () /* Gets the total number of switch statements to generate. */
+{
+ return total_switches;
+}
+
+
+
+
diff --git a/apps/gperf/src/Options.h b/apps/gperf/src/Options.h
new file mode 100644
index 00000000000..2d67003d991
--- /dev/null
+++ b/apps/gperf/src/Options.h
@@ -0,0 +1,140 @@
+/* -*- C++ -*- */
+// @(#)Options.h 1.1 10/18/96
+
+/* This may look like C code, but it is really -*- C++ -*- */
+
+/* Handles parsing the Options provided to the user.
+
+ Copyright (C) 1989 Free Software Foundation, Inc.
+ written by Douglas C. Schmidt (schmidt@ics.uci.edu)
+
+This file is part of GNU GPERF.
+
+GNU GPERF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 1, or (at your option)
+any later version.
+
+GNU GPERF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU GPERF; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
+
+/* This module provides a uniform interface to the various options
+ available to a user of the gperf hash function generator. In
+ addition to the run-time options, found in the Option_Type below,
+ there is also the hash table Size and the Keys to be used in the
+ hashing. The overall design of this module was an experiment in
+ using C++ classes as a mechanism to enhance centralization of
+ option and and error handling, which tend to get out of hand in a C
+ program. */
+
+#ifndef options_h
+#define options_h 1
+
+#include "ace/Log_Msg.h"
+
+/* Enumerate the potential debugging Options. */
+
+enum Option_Type
+{
+ DEBUG = 01, /* Enable debugging (prints diagnostics to stderr). */
+ ORDER = 02, /* Apply ordering heuristic to speed-up search time. */
+ ANSI = 04, /* Generate ANSI prototypes. */
+ ALLCHARS = 010, /* Use all characters in hash function. */
+ GNU = 020, /* Assume GNU extensions (primarily function inline). */
+ TYPE = 040, /* Handle user-defined type structured keyword input. */
+ RANDOM = 0100, /* Randomly initialize the associated values table. */
+ DEFAULTCHARS = 0200, /* Make default char positions be 1,$ (end of keyword). */
+ SWITCH = 0400, /* Generate switch output to save space. */
+ POINTER = 01000, /* Have in_word_set function return pointer, not boolean. */
+ NOLENGTH = 02000, /* Don't include keyword length in hash computations. */
+ LENTABLE = 04000, /* Generate a length table for string comparison. */
+ DUP = 010000, /* Handle duplicate hash values for keywords. */
+ FAST = 020000, /* Generate the hash function ``fast.'' */
+ NOTYPE = 040000, /* Don't include user-defined type definition in output -- it's already defined elsewhere. */
+ COMP = 0100000, /* Generate strncmp rather than strcmp. */
+ GLOBAL = 0200000, /* Make the keyword table a global variable. */
+ CONST = 0400000, /* Make the generated tables readonly (const). */
+ CPLUSPLUS = 01000000, /* Generate C++ code. */
+ C = 02000000, /* Generate C code. */
+ ENUM = 04000000, /* Use enum for constants. */
+ STRCASECMP = 010000000, /* Use the case insensitive comparison. */
+ OPTIMIZE = 020000000, /* Assume all input keywords are in the keyset. */
+ ADA = 040000000 /* Generate Ada code. */
+};
+
+/* Define some useful constants (these don't really belong here, but I'm
+ not sure where else to put them!). These should be consts, but g++
+ doesn't seem to do the right thing with them at the moment... ;-( */
+
+enum
+{
+ MAX_KEY_POS = 128 - 1, /* Max size of each word's key set. */
+ WORD_START = 1, /* Signals the start of a word. */
+ WORD_END = 0, /* Signals the end of a word. */
+ EOS = MAX_KEY_POS /* Signals end of the key list. */
+};
+
+/* Class manager for gperf program Options. */
+
+class Options
+{
+public:
+ Options (void);
+ ~Options (void);
+ int operator[] (Option_Type option);
+ void operator() (int argc, char *argv[]);
+ void operator= (enum Option_Type);
+ void operator!= (enum Option_Type);
+ static void print_options (void);
+ static void set_asso_max (int r);
+ static int get_asso_max (void);
+ static void reset (void);
+ static int get (void);
+ static int get_iterations (void);
+ static int get_max_keysig_size (void);
+ static void set_keysig_size (int);
+ static int get_jump (void);
+ static int initial_value (void);
+ static int get_total_switches (void);
+ static const char *get_function_name (void);
+ static const char *get_key_name (void);
+ static const char *get_class_name (void);
+ static const char *get_hash_name (void);
+ static const char *get_delimiter (void);
+
+private:
+ static int option_word; /* Holds the user-specified Options. */
+ static int total_switches; /* Number of switch statements to generate. */
+ static int total_keysig_size; /* Total number of distinct key_positions. */
+ static int size; /* Range of the hash table. */
+ static int key_pos; /* Tracks current key position for Iterator. */
+ static int jump; /* Jump length when trying alternative values. */
+ static int initial_asso_value; /* Initial value for asso_values table. */
+ static int argument_count; /* Records count of command-line arguments. */
+ static int iterations; /* Amount to iterate when a collision occurs. */
+ static char **argument_vector; /* Stores a pointer to command-line vector. */
+ static const char *function_name; /* Names used for generated lookup function. */
+ static const char *key_name; /* Name used for keyword key. */
+ static const char *class_name; /* Name used for generated C++ class. */
+ static const char *hash_name; /* Name used for generated hash function. */
+ static const char *delimiters; /* Separates keywords from other attributes. */
+ static char key_positions[MAX_KEY_POS]; /* Contains user-specified key choices. */
+ static int key_sort (char *base, int len); /* Sorts key positions in REVERSE order. */
+ static void usage (void); /* Prints proper program usage. */
+};
+
+/* Global option coordinator for the entire program. */
+extern Options option;
+
+/* Set to 1 if your want to stack-allocate some large arrays. */
+#ifndef LARGE_STACK_ARRAYS
+#define LARGE_STACK_ARRAYS 0
+#endif
+
+#endif
diff --git a/apps/gperf/src/Vectors.cpp b/apps/gperf/src/Vectors.cpp
new file mode 100644
index 00000000000..761e08b2672
--- /dev/null
+++ b/apps/gperf/src/Vectors.cpp
@@ -0,0 +1,33 @@
+/* This may look like C code, but it is really -*- C++ -*- */
+// @(#)Vectors.cpp 1.1 10/18/96
+
+
+/* Static class data members that are shared between several classes via
+ inheritance.
+
+ Copyright (C) 1989 Free Software Foundation, Inc.
+ written by Douglas C. Schmidt (schmidt@ics.uci.edu)
+
+This file is part of GNU GPERF.
+
+GNU GPERF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 1, or (at your option)
+any later version.
+
+GNU GPERF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU GPERF; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
+
+#include "Vectors.h"
+
+// Counts occurrences of each key set character.
+int Vectors::occurrences[ALPHA_SIZE];
+
+// Value associated with each character.
+int Vectors::asso_values[ALPHA_SIZE];
diff --git a/apps/gperf/src/Vectors.h b/apps/gperf/src/Vectors.h
new file mode 100644
index 00000000000..c01e9f27d8f
--- /dev/null
+++ b/apps/gperf/src/Vectors.h
@@ -0,0 +1,44 @@
+/* -*- C++ -*- */
+// @(#)Vectors.h 1.1 10/18/96
+
+#include <stdio.h>
+
+/* This may look like C code, but it is really -*- C++ -*- */
+
+/* Static class data members that are shared between several classes via
+ inheritance.
+
+ Copyright (C) 1989 Free Software Foundation, Inc.
+ written by Douglas C. Schmidt (schmidt@ics.uci.edu)
+
+This file is part of GNU GPERF.
+
+GNU GPERF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 1, or (at your option)
+any later version.
+
+GNU GPERF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU GPERF; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
+
+#ifndef vectors_h
+#define vectors_h 1
+
+static const int ALPHA_SIZE = 128;
+
+struct Vectors
+{
+ static int occurrences[ALPHA_SIZE];
+ // Counts occurrences of each key set character.
+
+ static int asso_values[ALPHA_SIZE];
+ // Value associated with each character.
+};
+
+#endif
diff --git a/apps/gperf/src/Version.cpp b/apps/gperf/src/Version.cpp
new file mode 100644
index 00000000000..8fb0d398887
--- /dev/null
+++ b/apps/gperf/src/Version.cpp
@@ -0,0 +1,25 @@
+/* Current program version number.
+// @(#)Version.cpp 1.1 10/18/96
+
+
+ Copyright (C) 1989 Free Software Foundation, Inc.
+ written by Douglas C. Schmidt (schmidt@ics.uci.edu)
+
+This file is part of GNU GPERF.
+
+GNU GPERF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 1, or (at your option)
+any later version.
+
+GNU GPERF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU GPERF; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111,
+USA. */
+
+char *version_string = "2.6 (GNU C++ version)";
diff --git a/apps/gperf/src/gperf.cpp b/apps/gperf/src/gperf.cpp
new file mode 100644
index 00000000000..2e6aa2c6406
--- /dev/null
+++ b/apps/gperf/src/gperf.cpp
@@ -0,0 +1,66 @@
+/* Driver program for the Gen_Perf hash function generator Copyright
+// @(#)gperf.cpp 1.1 10/18/96
+
+ (C) 1989 Free Software Foundation, Inc. written by Douglas
+ C. Schmidt (schmidt@ics.uci.edu)
+
+This file is part of GNU GPERF.
+
+GNU GPERF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 1, or (at your option)
+any later version.
+
+GNU GPERF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU GPERF; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
+
+// Simple driver program for the Gen_Perf.hash function generator.
+// All the hard work is done in class Gen_Perf and its class methods.
+
+#include "Options.h"
+#include "Gen_Perf.h"
+
+int
+main (int argc, char *argv[])
+{
+
+ struct tm *tm;
+ time_t clock;
+
+ time (&clock);
+ tm = localtime (&clock);
+ printf ("/* starting time is %d:%02d:%02d */\n", tm->tm_hour, tm->tm_min, tm->tm_sec);
+
+#if defined(RLIMIT_STACK) && LARGE_STACK_ARRAYS
+ /* Get rid of any avoidable limit on stack size. */
+ {
+ struct rlimit rlim;
+
+ /* Set the stack limit huge so that alloca does not fail. */
+ getrlimit (RLIMIT_STACK, &rlim);
+ rlim.rlim_cur = rlim.rlim_max;
+ setrlimit (RLIMIT_STACK, &rlim);
+ }
+#endif /* RLIMIT_STACK */
+
+ /* Sets the Options. */
+ option (argc, argv);
+
+ // Initializes the key word list.
+ Gen_Perf table;
+
+ // Generates and prints the Gen_Perf hash table. Don't use exit
+ // here, it skips the destructors.
+ int status = table.generate ();
+
+ time (&clock);
+ tm = localtime (&clock);
+ printf ("/* ending time is %d:%02d:%02d */\n", tm->tm_hour, tm->tm_min, tm->tm_sec);
+ return status;
+}
diff --git a/apps/gperf/src/new.cpp b/apps/gperf/src/new.cpp
new file mode 100644
index 00000000000..ebaafa16917
--- /dev/null
+++ b/apps/gperf/src/new.cpp
@@ -0,0 +1,75 @@
+/* Defines a buffered memory allocation abstraction that reduces calls to
+// @(#)new.cpp 1.1 10/18/96
+
+ malloc.
+ Copyright (C) 1989 Free Software Foundation, Inc.
+ written by Douglas C. Schmidt (schmidt@ics.uci.edu)
+
+This file is part of GNU GPERF.
+
+GNU GPERF is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 1, or (at your option)
+any later version.
+
+GNU GPERF is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU GPERF; see the file COPYING. If not, write to the Free
+Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA. */
+
+#include "Options.h"
+
+/* Determine default alignment. If your C++ compiler does not like
+ this then try something like #define DEFAULT_ALIGNMENT 8. */
+struct fooalign {char x; double d;};
+const int ALIGNMENT = ((char *)&((struct fooalign *) 0)->d - (char *)0);
+
+/* Provide an abstraction that cuts down on the number of calls to NEW
+ by buffering the memory pool from which strings are allocated. */
+
+void *
+operator new (size_t size)
+{
+ static char *buf_start = 0; /* Large array used to reduce calls to NEW. */
+ static char *buf_end = 0; /* Indicates end of BUF_START. */
+ static int buf_size = 4 * BUFSIZ; /* Size of buffer pointed to by BUF_START. */
+ char *temp;
+
+ /* Align this on correct boundaries, just to be safe... */
+ size = ((size + ALIGNMENT - 1) / ALIGNMENT) * ALIGNMENT;
+
+ /* If we are about to overflow our buffer we'll just grab another
+ chunk of memory. Since we never free the original memory it
+ doesn't matter that no one points to the beginning of that
+ chunk. Note we use a heuristic that grows the buffer either by
+ size of the request or by twice the previous size, whichever is
+ larger. */
+
+ if (buf_start + size >= buf_end)
+ {
+ buf_size *= 2;
+ if (buf_size < size)
+ buf_size = size;
+ if (buf_start = (char *)malloc (buf_size))
+ buf_end = buf_start + buf_size;
+ else
+ ACE_ERROR ((LM_ERROR, "Virtual memory failed at %s, %s in function %s\n%a", __FILE__, __LINE__, "operator new", 1));
+ }
+
+ temp = buf_start;
+ buf_start += size;
+ return temp;
+}
+
+/* We need this deletion operator in order to make the linker happy. */
+
+void
+operator delete (void *ptr)
+{
+ // We cannot call free here, as it doesn't match the mallocs.
+ // free ((char *) ptr);
+}
diff --git a/apps/gperf/tests/Makefile.in b/apps/gperf/tests/Makefile.in
new file mode 100644
index 00000000000..f702fc804f2
--- /dev/null
+++ b/apps/gperf/tests/Makefile.in
@@ -0,0 +1,72 @@
+# Copyright (C) 1989, 1992, 1993 Free Software Foundation, Inc.
+# written by Douglas C. Schmidt (schmidt@ics.uci.edu)
+#
+# This file is part of GNU GPERF.
+#
+# GNU GPERF is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 1, or (at your option)
+# any later version.
+#
+# GNU GPERF is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GNU GPERF; see the file COPYING. If not, write to the Free
+# Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+
+srcdir = .
+
+#### package, host, target, and site dependent Makefile fragments come in here.
+##
+
+GPERF = ../src/gperf
+
+check:
+ @echo "performing some tests of the perfect hash generator"
+ $(CC) -c $(CFLAGS) $(srcdir)/test.c
+ $(GPERF) -p -c -l -S1 -o $(srcdir)/c.gperf > cinset.c
+ $(CC) $(CFLAGS) -o cout cinset.c test.o
+ @echo "testing ANSI C reserved words, all items should be found in the set"
+ ./cout -v < $(srcdir)/c.gperf > c.out
+ -diff -b $(srcdir)/c.exp c.out
+ $(GPERF) -k1,4,'$$' $(srcdir)/ada.gperf > adainset.c
+# double '$$' is only there since make gets confused; programn wants only 1 '$'
+ $(CC) $(CFLAGS) -o aout adainset.c test.o
+ @echo "testing Ada reserved words, all items should be found in the set"
+ ./aout -v < $(srcdir)/ada.gperf > ada-res.out
+ -diff -b $(srcdir)/ada-res.exp ada-res.out
+ $(GPERF) -p -D -k1,'$$' -s 2 -o $(srcdir)/adadefs.gperf > preinset.c
+ $(CC) $(CFLAGS) -o preout preinset.c test.o
+ @echo "testing Ada predefined words, all items should be found in the set"
+ ./preout -v < $(srcdir)/adadefs.gperf > ada-pred.out
+ -diff -b $(srcdir)/ada-pred.exp ada-pred.out
+ $(GPERF) -k1,2,'$$' -o $(srcdir)/modula3.gperf > m3inset.c
+ $(CC) $(CFLAGS) -o m3out m3inset.c test.o
+ @echo "testing Modula3 reserved words, all items should be found in the set"
+ ./m3out -v < $(srcdir)/modula3.gperf > modula.out
+ -diff -b $(srcdir)/modula.exp modula.out
+ $(GPERF) -o -S2 -p < $(srcdir)/pascal.gperf > pinset.c
+ $(CC) $(CFLAGS) -o pout pinset.c test.o
+ @echo "testing Pascal reserved words, all items should be found in the set"
+ ./pout -v < $(srcdir)/pascal.gperf > pascal.out
+ -diff -b $(srcdir)/pascal.exp pascal.out
+# these next 5 are demos that show off the generated code
+ $(GPERF) -p -j1 -g -o -t -N is_reserved_word -k1,3,'$$' < $(srcdir)/c-parse.gperf > test-1.out
+ -diff -b $(srcdir)/test-1.exp test-1.out
+ $(GPERF) -n -k1-8 -l <$(srcdir)/modula2.gperf > test-2.out
+ -diff -b $(srcdir)/test-2.exp test-2.out
+ $(GPERF) -p -j 1 -o -a -C -g -t -k1,4,$$ < $(srcdir)/gplus.gperf > test-3.out
+ -diff -b $(srcdir)/test-3.exp test-3.out
+ $(GPERF) -D -p -t < $(srcdir)/c-parse.gperf > test-4.out
+ -diff -b $(srcdir)/test-4.exp test-4.out
+ $(GPERF) -g -o -j1 -t -p -N is_reserved_word < $(srcdir)/gpc.gperf > test-5.out
+ -diff -b $(srcdir)/test-5.exp test-5.out
+# prints out the help message
+ -$(GPERF) -h > test-6.out 2>&1 || [ a = a ]
+ -diff -b $(srcdir)/test-6.exp test-6.out
+ @echo "only if, do, for, case, goto, else, while, and return should be found "
+ ./aout -v < $(srcdir)/c.gperf > test-7.out
+ -diff -b $(srcdir)/test-7.exp test-7.out
diff --git a/apps/gperf/tests/ada-pred.exp b/apps/gperf/tests/ada-pred.exp
new file mode 100644
index 00000000000..33caaa32ea1
--- /dev/null
+++ b/apps/gperf/tests/ada-pred.exp
@@ -0,0 +1,54 @@
+in word set boolean
+in word set character
+in word set constraint_error
+in word set false
+in word set float
+in word set integer
+in word set natural
+in word set numeric_error
+in word set positive
+in word set program_error
+in word set storage_error
+in word set string
+in word set tasking_error
+in word set true
+in word set address
+in word set aft
+in word set base
+in word set callable
+in word set constrained
+in word set count
+in word set delta
+in word set digits
+in word set emax
+in word set epsilon
+in word set first
+in word set firstbit
+in word set fore
+in word set image
+in word set large
+in word set last
+in word set lastbit
+in word set length
+in word set machine_emax
+in word set machine_emin
+in word set machine_mantissa
+in word set machine_overflows
+in word set machine_radix
+in word set machine_rounds
+in word set mantissa
+in word set pos
+in word set position
+in word set pred
+in word set range
+in word set safe_emax
+in word set safe_large
+in word set safe_small
+in word set size
+in word set small
+in word set storage_size
+in word set succ
+in word set terminated
+in word set val
+in word set value
+in word set width
diff --git a/apps/gperf/tests/ada-res.exp b/apps/gperf/tests/ada-res.exp
new file mode 100644
index 00000000000..8134fe861f5
--- /dev/null
+++ b/apps/gperf/tests/ada-res.exp
@@ -0,0 +1,63 @@
+in word set else
+in word set exit
+in word set terminate
+in word set type
+in word set raise
+in word set range
+in word set reverse
+in word set declare
+in word set end
+in word set record
+in word set exception
+in word set not
+in word set then
+in word set return
+in word set separate
+in word set select
+in word set digits
+in word set renames
+in word set subtype
+in word set elsif
+in word set function
+in word set for
+in word set package
+in word set procedure
+in word set private
+in word set while
+in word set when
+in word set new
+in word set entry
+in word set delay
+in word set case
+in word set constant
+in word set at
+in word set abort
+in word set accept
+in word set and
+in word set delta
+in word set access
+in word set abs
+in word set pragma
+in word set array
+in word set use
+in word set out
+in word set do
+in word set others
+in word set of
+in word set or
+in word set all
+in word set limited
+in word set loop
+in word set null
+in word set task
+in word set in
+in word set is
+in word set if
+in word set rem
+in word set mod
+in word set begin
+in word set body
+in word set xor
+in word set goto
+in word set generic
+in word set with
diff --git a/apps/gperf/tests/ada.gperf b/apps/gperf/tests/ada.gperf
new file mode 100644
index 00000000000..332bdc740ad
--- /dev/null
+++ b/apps/gperf/tests/ada.gperf
@@ -0,0 +1,63 @@
+else
+exit
+terminate
+type
+raise
+range
+reverse
+declare
+end
+record
+exception
+not
+then
+return
+separate
+select
+digits
+renames
+subtype
+elsif
+function
+for
+package
+procedure
+private
+while
+when
+new
+entry
+delay
+case
+constant
+at
+abort
+accept
+and
+delta
+access
+abs
+pragma
+array
+use
+out
+do
+others
+of
+or
+all
+limited
+loop
+null
+task
+in
+is
+if
+rem
+mod
+begin
+body
+xor
+goto
+generic
+with
diff --git a/apps/gperf/tests/adadefs.gperf b/apps/gperf/tests/adadefs.gperf
new file mode 100644
index 00000000000..875be69abc9
--- /dev/null
+++ b/apps/gperf/tests/adadefs.gperf
@@ -0,0 +1,54 @@
+boolean
+character
+constraint_error
+false
+float
+integer
+natural
+numeric_error
+positive
+program_error
+storage_error
+string
+tasking_error
+true
+address
+aft
+base
+callable
+constrained
+count
+delta
+digits
+emax
+epsilon
+first
+firstbit
+fore
+image
+large
+last
+lastbit
+length
+machine_emax
+machine_emin
+machine_mantissa
+machine_overflows
+machine_radix
+machine_rounds
+mantissa
+pos
+position
+pred
+range
+safe_emax
+safe_large
+safe_small
+size
+small
+storage_size
+succ
+terminated
+val
+value
+width
diff --git a/apps/gperf/tests/c++.gperf b/apps/gperf/tests/c++.gperf
new file mode 100644
index 00000000000..650d32d0edd
--- /dev/null
+++ b/apps/gperf/tests/c++.gperf
@@ -0,0 +1,47 @@
+asm
+auto
+break
+case
+catch
+char
+class
+const
+continue
+default
+delete
+do
+double
+else
+enum
+extern
+float
+for
+friend
+goto
+if
+inline
+int
+long
+new
+operator
+overload
+private
+protected
+public
+register
+return
+short
+signed
+sizeof
+static
+struct
+switch
+template
+this
+typedef
+union
+unsigned
+virtual
+void
+volatile
+while
diff --git a/apps/gperf/tests/c-parse.gperf b/apps/gperf/tests/c-parse.gperf
new file mode 100644
index 00000000000..feef59babb0
--- /dev/null
+++ b/apps/gperf/tests/c-parse.gperf
@@ -0,0 +1,56 @@
+%{
+/* Command-line: gperf -p -j1 -i 1 -g -o -t -N is_reserved_word -k1,3,$ c-parse.gperf */
+%}
+struct resword { char *name; short token; enum rid rid; };
+%%
+__alignof, ALIGNOF, NORID
+__alignof__, ALIGNOF, NORID
+__asm, ASM, NORID
+__asm__, ASM, NORID
+__attribute, ATTRIBUTE, NORID
+__attribute__, ATTRIBUTE, NORID
+__const, TYPE_QUAL, RID_CONST
+__const__, TYPE_QUAL, RID_CONST
+__inline, SCSPEC, RID_INLINE
+__inline__, SCSPEC, RID_INLINE
+__signed, TYPESPEC, RID_SIGNED
+__signed__, TYPESPEC, RID_SIGNED
+__typeof, TYPEOF, NORID
+__typeof__, TYPEOF, NORID
+__volatile, TYPE_QUAL, RID_VOLATILE
+__volatile__, TYPE_QUAL, RID_VOLATILE
+asm, ASM, NORID
+auto, SCSPEC, RID_AUTO
+break, BREAK, NORID
+case, CASE, NORID
+char, TYPESPEC, RID_CHAR
+const, TYPE_QUAL, RID_CONST
+continue, CONTINUE, NORID
+default, DEFAULT, NORID
+do, DO, NORID
+double, TYPESPEC, RID_DOUBLE
+else, ELSE, NORID
+enum, ENUM, NORID
+extern, SCSPEC, RID_EXTERN
+float, TYPESPEC, RID_FLOAT
+for, FOR, NORID
+goto, GOTO, NORID
+if, IF, NORID
+inline, SCSPEC, RID_INLINE
+int, TYPESPEC, RID_INT
+long, TYPESPEC, RID_LONG
+register, SCSPEC, RID_REGISTER
+return, RETURN, NORID
+short, TYPESPEC, RID_SHORT
+signed, TYPESPEC, RID_SIGNED
+sizeof, SIZEOF, NORID
+static, SCSPEC, RID_STATIC
+struct, STRUCT, NORID
+switch, SWITCH, NORID
+typedef, SCSPEC, RID_TYPEDEF
+typeof, TYPEOF, NORID
+union, UNION, NORID
+unsigned, TYPESPEC, RID_UNSIGNED
+void, TYPESPEC, RID_VOID
+volatile, TYPE_QUAL, RID_VOLATILE
+while, WHILE, NORID
diff --git a/apps/gperf/tests/c.exp b/apps/gperf/tests/c.exp
new file mode 100644
index 00000000000..10c8b7f6116
--- /dev/null
+++ b/apps/gperf/tests/c.exp
@@ -0,0 +1,32 @@
+in word set if
+in word set do
+in word set int
+in word set for
+in word set case
+in word set char
+in word set auto
+in word set goto
+in word set else
+in word set long
+in word set void
+in word set enum
+in word set float
+in word set short
+in word set union
+in word set break
+in word set while
+in word set const
+in word set double
+in word set static
+in word set extern
+in word set struct
+in word set return
+in word set sizeof
+in word set switch
+in word set signed
+in word set typedef
+in word set default
+in word set unsigned
+in word set continue
+in word set register
+in word set volatile
diff --git a/apps/gperf/tests/c.gperf b/apps/gperf/tests/c.gperf
new file mode 100644
index 00000000000..8672d6c25ed
--- /dev/null
+++ b/apps/gperf/tests/c.gperf
@@ -0,0 +1,32 @@
+if
+do
+int
+for
+case
+char
+auto
+goto
+else
+long
+void
+enum
+float
+short
+union
+break
+while
+const
+double
+static
+extern
+struct
+return
+sizeof
+switch
+signed
+typedef
+default
+unsigned
+continue
+register
+volatile
diff --git a/apps/gperf/tests/configure.in b/apps/gperf/tests/configure.in
new file mode 100644
index 00000000000..d93c7bb1840
--- /dev/null
+++ b/apps/gperf/tests/configure.in
@@ -0,0 +1,26 @@
+# This file is a shell script fragment that supplies the information
+# necessary to tailor a template configure script into the configure
+# script appropriate for this directory. For more information, check
+# any existing configure script.
+
+configdirs=""
+srctrigger=c-parse.gperf
+srcname="test perfect hash function generator"
+
+target_makefile_frag=../../target-mkfrag
+package_makefile_frag=Make.pack
+
+# per-host:
+
+# per-target:
+
+TOLIBGXX=../../
+ALL='$(NOTHING)'
+CHECK=check
+MOSTLYCLEAN='*.o \#* core *inset.c output.* *.out aout cout m3out pout preout'
+
+(. ${srcdir}/../../config.shared) >${package_makefile_frag}
+
+# post-target:
+
+rm -f ${package_makefile_frag}
diff --git a/apps/gperf/tests/gpc.gperf b/apps/gperf/tests/gpc.gperf
new file mode 100644
index 00000000000..8fb469e46bc
--- /dev/null
+++ b/apps/gperf/tests/gpc.gperf
@@ -0,0 +1,48 @@
+%{
+/* ISO Pascal 7185 reserved words.
+ *
+ * For GNU Pascal compiler (GPC) by jtv@hut.fi
+ *
+ * run this through the Doug Schmidt's gperf program
+ * with command
+ * gperf -g -o -j1 -t -p -N is_reserved_word
+ *
+ */
+%}
+struct resword { char *name; short token; short iclass;};
+%%
+And, AND, PASCAL_ISO
+Array, ARRAY, PASCAL_ISO
+Begin, BEGIN_, PASCAL_ISO
+Case, CASE, PASCAL_ISO
+Const, CONST, PASCAL_ISO
+Div, DIV, PASCAL_ISO
+Do, DO, PASCAL_ISO
+Downto, DOWNTO, PASCAL_ISO
+Else, ELSE, PASCAL_ISO
+End, END, PASCAL_ISO
+File, FILE_, PASCAL_ISO
+For, FOR, PASCAL_ISO
+Function, FUNCTION, PASCAL_ISO
+Goto, GOTO, PASCAL_ISO
+If, IF, PASCAL_ISO
+In, IN, PASCAL_ISO
+Label, LABEL, PASCAL_ISO
+Mod, MOD, PASCAL_ISO
+Nil, NIL, PASCAL_ISO
+Not, NOT, PASCAL_ISO
+Of, OF, PASCAL_ISO
+Or, OR, PASCAL_ISO
+Packed, PACKED, PASCAL_ISO
+Procedure, PROCEDURE, PASCAL_ISO
+Program,PROGRAM,PASCAL_ISO
+Record, RECORD, PASCAL_ISO
+Repeat, REPEAT, PASCAL_ISO
+Set, SET, PASCAL_ISO
+Then, THEN, PASCAL_ISO
+To, TO, PASCAL_ISO
+Type, TYPE, PASCAL_ISO
+Until, UNTIL, PASCAL_ISO
+Var, VAR, PASCAL_ISO
+While, WHILE, PASCAL_ISO
+With, WITH, PASCAL_ISO
diff --git a/apps/gperf/tests/gplus.gperf b/apps/gperf/tests/gplus.gperf
new file mode 100644
index 00000000000..4a93315be52
--- /dev/null
+++ b/apps/gperf/tests/gplus.gperf
@@ -0,0 +1,76 @@
+%{
+/* Command-line: gperf -p -j1 -g -o -t -N is_reserved_word -k1,4,$ gplus.gperf */
+%}
+struct resword { char *name; short token; enum rid rid;};
+%%
+__alignof, ALIGNOF, NORID
+__alignof__, ALIGNOF, NORID
+__asm, ASM, NORID
+__asm__, ASM, NORID
+__attribute, ATTRIBUTE, NORID
+__attribute__, ATTRIBUTE, NORID
+__const, TYPE_QUAL, RID_CONST
+__const__, TYPE_QUAL, RID_CONST
+__inline, SCSPEC, RID_INLINE
+__inline__, SCSPEC, RID_INLINE
+__signed, TYPESPEC, RID_SIGNED
+__signed__, TYPESPEC, RID_SIGNED
+__typeof, TYPEOF, NORID
+__typeof__, TYPEOF, NORID
+__volatile, TYPE_QUAL, RID_VOLATILE
+__volatile__, TYPE_QUAL, RID_VOLATILE
+all, ALL, NORID /* Extension */,
+except, EXCEPT, NORID /* Extension */,
+exception, AGGR, RID_EXCEPTION /* Extension */,
+raise, RAISE, NORID /* Extension */,
+raises, RAISES, NORID /* Extension */,
+reraise, RERAISE, NORID /* Extension */,
+try, TRY, NORID /* Extension */,
+asm, ASM, NORID,
+auto, SCSPEC, RID_AUTO,
+break, BREAK, NORID,
+case, CASE, NORID,
+catch, CATCH, NORID,
+char, TYPESPEC, RID_CHAR,
+class, AGGR, RID_CLASS,
+const, TYPE_QUAL, RID_CONST,
+continue, CONTINUE, NORID,
+default, DEFAULT, NORID,
+delete, DELETE, NORID,
+do, DO, NORID,
+double, TYPESPEC, RID_DOUBLE,
+dynamic, DYNAMIC, NORID,
+else, ELSE, NORID,
+enum, ENUM, NORID,
+extern, SCSPEC, RID_EXTERN,
+float, TYPESPEC, RID_FLOAT,
+for, FOR, NORID,
+friend, SCSPEC, RID_FRIEND,
+goto, GOTO, NORID,
+if, IF, NORID,
+inline, SCSPEC, RID_INLINE,
+int, TYPESPEC, RID_INT,
+long, TYPESPEC, RID_LONG,
+new, NEW, NORID,
+operator, OPERATOR, NORID,
+overload, OVERLOAD, NORID,
+private, PRIVATE, NORID,
+protected, PROTECTED, NORID,
+public, PUBLIC, NORID,
+register, SCSPEC, RID_REGISTER,
+return, RETURN, NORID,
+short, TYPESPEC, RID_SHORT,
+signed, TYPESPEC, RID_SIGNED,
+sizeof, SIZEOF, NORID,
+static, SCSPEC, RID_STATIC,
+struct, AGGR, RID_RECORD,
+switch, SWITCH, NORID,
+this, THIS, NORID,
+typedef, SCSPEC, RID_TYPEDEF,
+typeof, TYPEOF, NORID,
+union, AGGR, RID_UNION,
+unsigned, TYPESPEC, RID_UNSIGNED,
+virtual, SCSPEC, RID_VIRTUAL,
+void, TYPESPEC, RID_VOID,
+volatile, TYPE_QUAL, RID_VOLATILE,
+while, WHILE, NORID,
diff --git a/apps/gperf/tests/irc.gperf b/apps/gperf/tests/irc.gperf
new file mode 100644
index 00000000000..afe53c59e7d
--- /dev/null
+++ b/apps/gperf/tests/irc.gperf
@@ -0,0 +1,63 @@
+%{
+extern int m_text(), m_private(), m_who(), m_whois(), m_user(), m_list();
+extern int m_topic(), m_invite(), m_channel(), m_version(), m_quit();
+extern int m_server(), m_kill(), m_info(), m_links(), m_summon(), m_stats();
+extern int m_users(), m_nick(), m_error(), m_help(), m_whoreply();
+extern int m_squit(), m_restart(), m_away(), m_die(), m_connect();
+extern int m_ping(), m_pong(), m_oper(), m_pass(), m_wall(), m_trace();
+extern int m_time(), m_rehash(), m_names(), m_namreply(), m_admin();
+extern int m_linreply(), m_notice(), m_lusers(), m_voice(), m_grph();
+extern int m_xtra(), m_motd();
+%}
+struct Message {
+ char *cmd;
+ int (* func)();
+ int count;
+ int parameters;
+};
+%%
+NICK, m_nick, 0, 1
+MSG, m_text, 0, 1
+PRIVMSG, m_private, 0, 2
+WHO, m_who, 0, 1
+WHOIS, m_whois, 0, 4
+USER, m_user, 0, 4
+SERVER, m_server, 0, 2
+LIST, m_list, 0, 1
+TOPIC, m_topic, 0, 1
+INVITE, m_invite, 0, 2
+CHANNEL, m_channel, 0, 1
+VERSION, m_version, 0, 1
+QUIT, m_quit, 0, 2
+SQUIT, m_squit, 0, 2
+KILL, m_kill, 0, 2
+INFO, m_info, 0, 1
+LINKS, m_links, 0, 1
+SUMMON, m_summon, 0, 1
+STATS, m_stats, 0, 1
+USERS, m_users, 0, 1
+RESTART, m_restart, 0, 1
+WHOREPLY,m_whoreply, 0, 7
+HELP, m_help, 0, 2
+ERROR, m_error, 0, 1
+AWAY, m_away, 0, 1
+DIE, m_die, 0, 1
+CONNECT, m_connect, 0, 3
+PING, m_ping, 0, 2
+PONG, m_pong, 0, 3
+OPER, m_oper, 0, 3
+PASS, m_pass, 0, 2
+WALL, m_wall, 0, 1
+TIME, m_time, 0, 1
+REHASH, m_rehash, 0, 1
+NAMES, m_names, 0, 1
+NAMREPLY,m_namreply, 0, 3
+ADMIN, m_admin, 0, 1
+TRACE, m_trace, 0, 1
+LINREPLY,m_linreply, 0, 2
+NOTICE, m_notice, 0, 2
+LUSERS, m_lusers, 0, 1
+VOICE, m_voice, 0, 2
+GRPH, m_grph, 0, 2
+XTRA, m_xtra, 0, 2
+MOTD, m_motd, 0, 2
diff --git a/apps/gperf/tests/makeinfo.gperf b/apps/gperf/tests/makeinfo.gperf
new file mode 100644
index 00000000000..1488b8e38fb
--- /dev/null
+++ b/apps/gperf/tests/makeinfo.gperf
@@ -0,0 +1,116 @@
+COMMAND;
+%%
+!, cm_force_sentence_end, false
+', insert_self, false
+*, cm_asterisk, false
+., cm_force_sentence_end, false
+:, cm_force_abbreviated_whitespace, false
+?, cm_force_sentence_end, false
+@, insert_self, false
+TeX, cm_TeX, true
+`, insert_self, false
+appendix, cm_appendix, false
+appendixsec, cm_appendixsec, false
+appendixsubsec, cm_appendixsubsec, false
+asis, cm_asis, true
+b, cm_bold, true
+br, cm_br, false
+bullet, cm_bullet, true
+bye, cm_bye, false
+c, cm_comment, false
+center, cm_center, false
+chapter, cm_chapter, false
+cindex, cm_cindex, false
+cite, cm_cite, true
+code, cm_code, true
+comment, cm_comment, false
+contents, do_nothing, false
+copyright, cm_copyright, true
+ctrl, cm_ctrl, true
+defcodeindex, cm_defindex, false
+defindex, cm_defindex, false
+dfn, cm_dfn, true
+display, cm_display, false
+dots, cm_dots, true
+emph, cm_emph, true
+end, cm_end, false
+enumerate, cm_enumerate, false
+equiv, cm_equiv, true
+error, cm_error, true
+example, cm_example, false
+exdent, cm_exdent, false
+expansion, cm_expansion, true
+file, cm_file, true
+findex, cm_findex, false
+format, cm_format, false
+group, cm_group, false
+i, cm_italic, true
+iappendix, cm_appendix, false
+iappendixsec, cm_appendixsec, false
+iappendixsubsec, cm_appendixsubsec, false
+ichapter, cm_chapter, false
+ifinfo, cm_ifinfo, false
+iftex, cm_iftex, false
+ignore, cm_ignore, false
+include, cm_include, false
+inforef, cm_inforef, true
+input, cm_include, false
+isection, cm_section, false
+isubsection, cm_subsection, false
+isubsubsection, cm_subsubsection, false
+item, cm_item, false
+itemize, cm_itemize, false
+itemx, cm_itemx, false
+iunnumbered, cm_unnumbered, false
+iunnumberedsec, cm_unnumberedsec, false
+iunnumberedsubsec, cm_unnumberedsubsec, false
+kbd, cm_kbd, true
+key, cm_key, true
+kindex, cm_kindex, false
+lisp, cm_lisp, false
+menu, cm_menu
+minus, cm_minus, true
+need, cm_need, false
+node, cm_node, false
+noindent, cm_noindent, false
+page, do_nothing, false
+pindex, cm_pindex, false
+point, cm_point, true
+print, cm_print, true
+printindex, cm_printindex, false
+pxref, cm_pxref, true
+quotation, cm_quotation, false
+r, cm_roman, true
+ref, cm_xref, true
+refill, cm_refill, false
+result, cm_result, true
+samp, cm_samp, true
+sc, cm_sc, true
+section, cm_section, false
+setchapternewpage, cm_setchapternewpage, false
+setfilename, cm_setfilename, false
+settitle, cm_settitle, false
+smallexample, cm_smallexample, false
+sp, cm_sp, false
+strong, cm_strong, true
+subsection, cm_subsection, false
+subsubsection, cm_subsubsection, false
+summarycontents, do_nothing, false
+syncodeindex, cm_synindex, false
+synindex, cm_synindex, false
+t, cm_title, true
+table, cm_table, false
+tex, cm_tex, false
+tindex, cm_tindex, false
+titlepage, cm_titlepage, false
+unnumbered, cm_unnumbered, false
+unnumberedsec, cm_unnumberedsec, false
+unnumberedsubsec, cm_unnumberedsubsec, false
+var, cm_var, true
+vindex, cm_vindex, false
+w, cm_w, true
+xref, cm_xref, true
+{, insert_self, false
+}, insert_self, false
+infoinclude, cm_infoinclude, false
+footnote, cm_footnote, false
diff --git a/apps/gperf/tests/modula.exp b/apps/gperf/tests/modula.exp
new file mode 100644
index 00000000000..cef7d5acad8
--- /dev/null
+++ b/apps/gperf/tests/modula.exp
@@ -0,0 +1,106 @@
+in word set AND
+in word set ARRAY
+in word set BEGIN
+in word set BITS
+in word set BY
+in word set CASE
+in word set CONST
+in word set DIV
+in word set DO
+in word set ELSE
+in word set ELSIF
+in word set END
+in word set EVAL
+in word set EXCEPT
+in word set EXCEPTION
+in word set EXIT
+in word set EXPORTS
+in word set FINALLY
+in word set FOR
+in word set FROM
+in word set IF
+in word set IMPORT
+in word set INTERFACE
+in word set IN
+in word set INLINE
+in word set LOCK
+in word set METHODS
+in word set MOD
+in word set MODULE
+in word set NOT
+in word set OBJECT
+in word set OF
+in word set OR
+in word set PROCEDURE
+in word set RAISES
+in word set READONLY
+in word set RECORD
+in word set REF
+in word set REPEAT
+in word set RETURN
+in word set SET
+in word set THEN
+in word set TO
+in word set TRY
+in word set TYPE
+in word set TYPECASE
+in word set UNSAFE
+in word set UNTIL
+in word set UNTRACED
+in word set VALUE
+in word set VAR
+in word set WHILE
+in word set WITH
+in word set and
+in word set array
+in word set begin
+in word set bits
+in word set by
+in word set case
+in word set const
+in word set div
+in word set do
+in word set else
+in word set elsif
+in word set end
+in word set eval
+in word set except
+in word set exception
+in word set exit
+in word set exports
+in word set finally
+in word set for
+in word set from
+in word set if
+in word set import
+in word set interface
+in word set in
+in word set inline
+in word set lock
+in word set methods
+in word set mod
+in word set module
+in word set not
+in word set object
+in word set of
+in word set or
+in word set procedure
+in word set raises
+in word set readonly
+in word set record
+in word set ref
+in word set repeat
+in word set return
+in word set set
+in word set then
+in word set to
+in word set try
+in word set type
+in word set typecase
+in word set unsafe
+in word set until
+in word set untraced
+in word set value
+in word set var
+in word set while
+in word set with
diff --git a/apps/gperf/tests/modula2.gperf b/apps/gperf/tests/modula2.gperf
new file mode 100644
index 00000000000..5ef9c753835
--- /dev/null
+++ b/apps/gperf/tests/modula2.gperf
@@ -0,0 +1,40 @@
+AND
+ARRAY
+BEGIN
+BY
+CASE
+CONST
+DEFINITION
+DIV
+DO
+ELSE
+ELSIF
+END
+EXIT
+EXPORT
+FOR
+FROM
+IF
+IMPLEMENTATION
+IMPORT
+IN
+LOOP
+MOD
+MODULE
+NOT
+OF
+OR
+POINTER
+PROCEDURE
+QUALIFIED
+RECORD
+REPEAT
+RETURN
+SET
+THEN
+TO
+TYPE
+UNTIL
+VAR
+WHILE
+WITH
diff --git a/apps/gperf/tests/modula3.gperf b/apps/gperf/tests/modula3.gperf
new file mode 100644
index 00000000000..d0243460d9b
--- /dev/null
+++ b/apps/gperf/tests/modula3.gperf
@@ -0,0 +1,106 @@
+AND
+ARRAY
+BEGIN
+BITS
+BY
+CASE
+CONST
+DIV
+DO
+ELSE
+ELSIF
+END
+EVAL
+EXCEPT
+EXCEPTION
+EXIT
+EXPORTS
+FINALLY
+FOR
+FROM
+IF
+IMPORT
+INTERFACE
+IN
+INLINE
+LOCK
+METHODS
+MOD
+MODULE
+NOT
+OBJECT
+OF
+OR
+PROCEDURE
+RAISES
+READONLY
+RECORD
+REF
+REPEAT
+RETURN
+SET
+THEN
+TO
+TRY
+TYPE
+TYPECASE
+UNSAFE
+UNTIL
+UNTRACED
+VALUE
+VAR
+WHILE
+WITH
+and
+array
+begin
+bits
+by
+case
+const
+div
+do
+else
+elsif
+end
+eval
+except
+exception
+exit
+exports
+finally
+for
+from
+if
+import
+interface
+in
+inline
+lock
+methods
+mod
+module
+not
+object
+of
+or
+procedure
+raises
+readonly
+record
+ref
+repeat
+return
+set
+then
+to
+try
+type
+typecase
+unsafe
+until
+untraced
+value
+var
+while
+with
diff --git a/apps/gperf/tests/pascal.exp b/apps/gperf/tests/pascal.exp
new file mode 100644
index 00000000000..765e44c6a0f
--- /dev/null
+++ b/apps/gperf/tests/pascal.exp
@@ -0,0 +1,36 @@
+in word set with
+in word set array
+in word set and
+in word set function
+in word set case
+in word set var
+in word set const
+in word set until
+in word set then
+in word set set
+in word set record
+in word set program
+in word set procedure
+in word set or
+in word set packed
+in word set not
+in word set nil
+in word set label
+in word set in
+in word set repeat
+in word set of
+in word set goto
+in word set forward
+in word set for
+in word set while
+in word set file
+in word set else
+in word set downto
+in word set do
+in word set div
+in word set to
+in word set type
+in word set end
+in word set mod
+in word set begin
+in word set if
diff --git a/apps/gperf/tests/pascal.gperf b/apps/gperf/tests/pascal.gperf
new file mode 100644
index 00000000000..fed3fbb30ea
--- /dev/null
+++ b/apps/gperf/tests/pascal.gperf
@@ -0,0 +1,36 @@
+with
+array
+and
+function
+case
+var
+const
+until
+then
+set
+record
+program
+procedure
+or
+packed
+not
+nil
+label
+in
+repeat
+of
+goto
+forward
+for
+while
+file
+else
+downto
+do
+div
+to
+type
+end
+mod
+begin
+if
diff --git a/apps/gperf/tests/test-1.exp b/apps/gperf/tests/test-1.exp
new file mode 100644
index 00000000000..5788cf7dfc3
--- /dev/null
+++ b/apps/gperf/tests/test-1.exp
@@ -0,0 +1,140 @@
+/* C code produced by gperf version 2.5 (GNU C++ version) */
+/* Command-line: ../src/gperf -p -j1 -g -o -t -N is_reserved_word -k1,3,$ */
+/* Command-line: gperf -p -j1 -i 1 -g -o -t -N is_reserved_word -k1,3,$ c-parse.gperf */
+struct resword { char *name; short token; enum rid rid; };
+
+#define TOTAL_KEYWORDS 51
+#define MIN_WORD_LENGTH 2
+#define MAX_WORD_LENGTH 13
+#define MIN_HASH_VALUE 8
+#define MAX_HASH_VALUE 82
+/* maximum key range = 75, duplicates = 0 */
+
+#ifdef __GNUC__
+inline
+#endif
+static unsigned int
+hash (str, len)
+ register char *str;
+ register int unsigned len;
+{
+ static unsigned char asso_values[] =
+ {
+ 83, 83, 83, 83, 83, 83, 83, 83, 83, 83,
+ 83, 83, 83, 83, 83, 83, 83, 83, 83, 83,
+ 83, 83, 83, 83, 83, 83, 83, 83, 83, 83,
+ 83, 83, 83, 83, 83, 83, 83, 83, 83, 83,
+ 83, 83, 83, 83, 83, 83, 83, 83, 83, 83,
+ 83, 83, 83, 83, 83, 83, 83, 83, 83, 83,
+ 83, 83, 83, 83, 83, 83, 83, 83, 83, 83,
+ 83, 83, 83, 83, 83, 83, 83, 83, 83, 83,
+ 83, 83, 83, 83, 83, 83, 83, 83, 83, 83,
+ 83, 83, 83, 83, 83, 0, 83, 1, 2, 34,
+ 19, 6, 11, 29, 0, 17, 83, 0, 23, 28,
+ 26, 30, 31, 83, 15, 1, 0, 28, 13, 4,
+ 83, 83, 5, 83, 83, 83, 83, 83,
+ };
+ register int hval = len;
+
+ switch (hval)
+ {
+ default:
+ case 3:
+ hval += asso_values[str[2]];
+ case 2:
+ case 1:
+ hval += asso_values[str[0]];
+ break;
+ }
+ return hval + asso_values[str[len - 1]];
+}
+
+#ifdef __GNUC__
+inline
+#endif
+struct resword *
+is_reserved_word (str, len)
+ register char *str;
+ register unsigned int len;
+{
+ static struct resword wordlist[] =
+ {
+ {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
+ {"__asm__", ASM, NORID},
+ {"",},
+ {"__typeof__", TYPEOF, NORID},
+ {"__signed__", TYPESPEC, RID_SIGNED},
+ {"__alignof__", ALIGNOF, NORID},
+ {"break", BREAK, NORID},
+ {"__attribute__", ATTRIBUTE, NORID},
+ {"",}, {"",},
+ {"else", ELSE, NORID},
+ {"__attribute", ATTRIBUTE, NORID},
+ {"__typeof", TYPEOF, NORID},
+ {"int", TYPESPEC, RID_INT},
+ {"__alignof", ALIGNOF, NORID},
+ {"struct", STRUCT, NORID},
+ {"sizeof", SIZEOF, NORID},
+ {"switch", SWITCH, NORID},
+ {"__volatile__", TYPE_QUAL, RID_VOLATILE},
+ {"",},
+ {"__inline__", SCSPEC, RID_INLINE},
+ {"__signed", TYPESPEC, RID_SIGNED},
+ {"__volatile", TYPE_QUAL, RID_VOLATILE},
+ {"if", IF, NORID},
+ {"__inline", SCSPEC, RID_INLINE},
+ {"while", WHILE, NORID},
+ {"",},
+ {"__asm", ASM, NORID},
+ {"auto", SCSPEC, RID_AUTO},
+ {"short", TYPESPEC, RID_SHORT},
+ {"default", DEFAULT, NORID},
+ {"extern", SCSPEC, RID_EXTERN},
+ {"",}, {"",},
+ {"__const", TYPE_QUAL, RID_CONST},
+ {"static", SCSPEC, RID_STATIC},
+ {"__const__", TYPE_QUAL, RID_CONST},
+ {"for", FOR, NORID},
+ {"case", CASE, NORID},
+ {"float", TYPESPEC, RID_FLOAT},
+ {"return", RETURN, NORID},
+ {"typeof", TYPEOF, NORID},
+ {"typedef", SCSPEC, RID_TYPEDEF},
+ {"volatile", TYPE_QUAL, RID_VOLATILE},
+ {"do", DO, NORID},
+ {"inline", SCSPEC, RID_INLINE},
+ {"void", TYPESPEC, RID_VOID},
+ {"char", TYPESPEC, RID_CHAR},
+ {"signed", TYPESPEC, RID_SIGNED},
+ {"unsigned", TYPESPEC, RID_UNSIGNED},
+ {"",}, {"",},
+ {"double", TYPESPEC, RID_DOUBLE},
+ {"asm", ASM, NORID},
+ {"",}, {"",},
+ {"goto", GOTO, NORID},
+ {"",},
+ {"const", TYPE_QUAL, RID_CONST},
+ {"enum", ENUM, NORID},
+ {"register", SCSPEC, RID_REGISTER},
+ {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
+ {"continue", CONTINUE, NORID},
+ {"",},
+ {"union", UNION, NORID},
+ {"",}, {"",}, {"",}, {"",}, {"",},
+ {"long", TYPESPEC, RID_LONG},
+ };
+
+ if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+ {
+ register int key = hash (str, len);
+
+ if (key <= MAX_HASH_VALUE && key >= 0)
+ {
+ register char *s = wordlist[key].name;
+
+ if (*s == *str && !strcmp (str + 1, s + 1))
+ return &wordlist[key];
+ }
+ }
+ return 0;
+}
diff --git a/apps/gperf/tests/test-2.exp b/apps/gperf/tests/test-2.exp
new file mode 100644
index 00000000000..f74124155eb
--- /dev/null
+++ b/apps/gperf/tests/test-2.exp
@@ -0,0 +1,183 @@
+/* C code produced by gperf version 2.5 (GNU C++ version) */
+/* Command-line: ../src/gperf -n -k1-8 -l */
+
+#define TOTAL_KEYWORDS 40
+#define MIN_WORD_LENGTH 2
+#define MAX_WORD_LENGTH 14
+#define MIN_HASH_VALUE 1
+#define MAX_HASH_VALUE 256
+/* maximum key range = 256, duplicates = 0 */
+
+static unsigned int
+hash (str, len)
+ register char *str;
+ register int unsigned len;
+{
+ static unsigned short asso_values[] =
+ {
+ 257, 257, 257, 257, 257, 257, 257, 257, 257, 257,
+ 257, 257, 257, 257, 257, 257, 257, 257, 257, 257,
+ 257, 257, 257, 257, 257, 257, 257, 257, 257, 257,
+ 257, 257, 257, 257, 257, 257, 257, 257, 257, 257,
+ 257, 257, 257, 257, 257, 257, 257, 257, 257, 257,
+ 257, 257, 257, 257, 257, 257, 257, 257, 257, 257,
+ 257, 257, 257, 257, 257, 25, 30, 35, 21, 0,
+ 30, 15, 30, 45, 257, 257, 0, 5, 45, 0,
+ 10, 0, 1, 20, 25, 15, 30, 40, 15, 5,
+ 257, 257, 257, 257, 257, 257, 257, 257, 257, 257,
+ 257, 257, 257, 257, 257, 257, 257, 257, 257, 257,
+ 257, 257, 257, 257, 257, 257, 257, 257, 257, 257,
+ 257, 257, 257, 257, 257, 257, 257, 257,
+ };
+ register int hval = 0;
+
+ switch (len)
+ {
+ default:
+ case 8:
+ hval += asso_values[str[7]];
+ case 7:
+ hval += asso_values[str[6]];
+ case 6:
+ hval += asso_values[str[5]];
+ case 5:
+ hval += asso_values[str[4]];
+ case 4:
+ hval += asso_values[str[3]];
+ case 3:
+ hval += asso_values[str[2]];
+ case 2:
+ hval += asso_values[str[1]];
+ case 1:
+ hval += asso_values[str[0]];
+ break;
+ }
+ return hval;
+}
+
+char *
+in_word_set (str, len)
+ register char *str;
+ register unsigned int len;
+{
+
+ static unsigned char lengthtable[] =
+ {
+ 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 4, 2, 0, 0, 0, 2, 3, 0,
+ 0, 0, 2, 3, 0, 0, 0, 2, 4, 0, 0, 0, 4, 6,
+ 0, 0, 0, 3, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0,
+ 3, 5, 6, 0, 0, 6, 0, 0, 0, 0, 3, 0, 0, 0,
+ 3, 0, 0, 0, 0, 2, 0, 0, 0, 0, 4, 0, 0, 9,
+ 0, 4, 6, 6, 0, 0, 2, 3, 0, 0, 0, 5, 3, 0,
+ 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 0,
+ 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
+ 7, 0, 0, 0, 5, 0, 0, 0, 0, 5, 0, 0, 0, 0,
+ 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 9, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 10,
+ };
+ static char *wordlist[] =
+ {
+ "",
+ "OR",
+ "", "", "", "", "", "", "", "",
+ "LOOP",
+ "", "", "", "", "", "", "", "", "",
+ "ELSE",
+ "DO",
+ "", "", "",
+ "TO",
+ "MOD",
+ "", "", "",
+ "OF",
+ "FOR",
+ "", "", "",
+ "BY",
+ "FROM",
+ "", "", "",
+ "TYPE",
+ "MODULE",
+ "", "", "",
+ "SET",
+ "", "", "", "", "",
+ "EXPORT",
+ "", "", "", "",
+ "VAR",
+ "ARRAY",
+ "RECORD",
+ "", "",
+ "REPEAT",
+ "", "", "", "",
+ "END",
+ "", "", "",
+ "NOT",
+ "", "", "", "",
+ "IF",
+ "", "", "", "",
+ "CASE",
+ "", "",
+ "PROCEDURE",
+ "",
+ "EXIT",
+ "IMPORT",
+ "RETURN",
+ "", "",
+ "IN",
+ "AND",
+ "", "", "",
+ "ELSIF",
+ "DIV",
+ "", "", "",
+ "THEN",
+ "", "", "", "", "", "", "", "", "",
+ "IMPLEMENTATION",
+ "", "", "", "",
+ "WHILE",
+ "", "", "", "", "", "", "", "", "",
+ "CONST",
+ "POINTER",
+ "", "", "",
+ "UNTIL",
+ "", "", "", "",
+ "BEGIN",
+ "", "", "", "",
+ "WITH",
+ "", "", "", "", "", "", "", "", "",
+ "", "", "", "", "", "", "", "", "",
+ "", "QUALIFIED",
+ "", "", "", "", "", "", "", "", "",
+ "", "", "", "", "", "", "", "", "",
+ "", "", "", "", "", "", "", "", "",
+ "", "", "", "", "", "", "", "", "",
+ "", "", "", "", "", "", "", "", "",
+ "", "", "", "", "", "", "", "", "",
+ "", "", "", "", "", "", "", "", "",
+ "", "", "", "", "", "", "", "", "",
+ "", "", "", "", "", "", "", "", "",
+ "", "", "", "", "", "", "", "", "",
+ "", "", "", "", "",
+ "DEFINITION",
+ };
+
+ if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+ {
+ register int key = hash (str, len);
+
+ if (key <= MAX_HASH_VALUE && key >= 0)
+ {
+ register char *s = wordlist[key];
+
+ if (len == lengthtable[key]
+ && *s == *str && !strcmp (str + 1, s + 1))
+ return s;
+ }
+ }
+ return 0;
+}
diff --git a/apps/gperf/tests/test-3.exp b/apps/gperf/tests/test-3.exp
new file mode 100644
index 00000000000..5e889020657
--- /dev/null
+++ b/apps/gperf/tests/test-3.exp
@@ -0,0 +1,169 @@
+/* C code produced by gperf version 2.5 (GNU C++ version) */
+/* Command-line: ../src/gperf -p -j 1 -o -a -C -g -t -k1,4,$ */
+/* Command-line: gperf -p -j1 -g -o -t -N is_reserved_word -k1,4,$ gplus.gperf */
+struct resword { char *name; short token; enum rid rid;};
+
+#define TOTAL_KEYWORDS 71
+#define MIN_WORD_LENGTH 2
+#define MAX_WORD_LENGTH 13
+#define MIN_HASH_VALUE 4
+#define MAX_HASH_VALUE 147
+/* maximum key range = 144, duplicates = 0 */
+
+#ifdef __GNUC__
+inline
+#endif
+static unsigned int
+hash (register const char *str, register int len)
+{
+ static const unsigned char asso_values[] =
+ {
+ 148, 148, 148, 148, 148, 148, 148, 148, 148, 148,
+ 148, 148, 148, 148, 148, 148, 148, 148, 148, 148,
+ 148, 148, 148, 148, 148, 148, 148, 148, 148, 148,
+ 148, 148, 148, 148, 148, 148, 148, 148, 148, 148,
+ 148, 148, 148, 148, 148, 148, 148, 148, 148, 148,
+ 148, 148, 148, 148, 148, 148, 148, 148, 148, 148,
+ 148, 148, 148, 148, 148, 148, 148, 148, 148, 148,
+ 148, 148, 148, 148, 148, 148, 148, 148, 148, 148,
+ 148, 148, 148, 148, 148, 148, 148, 148, 148, 148,
+ 148, 148, 148, 148, 148, 0, 148, 19, 6, 27,
+ 37, 0, 12, 1, 15, 63, 148, 4, 0, 56,
+ 20, 15, 42, 148, 31, 5, 26, 39, 32, 10,
+ 148, 40, 148, 148, 148, 148, 148, 148,
+ };
+ register int hval = len;
+
+ switch (hval)
+ {
+ default:
+ case 4:
+ hval += asso_values[str[3]];
+ case 3:
+ case 2:
+ case 1:
+ hval += asso_values[str[0]];
+ break;
+ }
+ return hval + asso_values[str[len - 1]];
+}
+
+#ifdef __GNUC__
+inline
+#endif
+const struct resword *
+in_word_set (register const char *str, register int len)
+{
+ static const struct resword wordlist[] =
+ {
+ {"",}, {"",}, {"",}, {"",},
+ {"else", ELSE, NORID,},
+ {"",},
+ {"long", TYPESPEC, RID_LONG,},
+ {"",}, {"",}, {"",}, {"",},
+ {"__alignof__", ALIGNOF, NORID},
+ {"__asm__", ASM, NORID},
+ {"",}, {"",},
+ {"while", WHILE, NORID,},
+ {"",}, {"",}, {"",}, {"",}, {"",},
+ {"__alignof", ALIGNOF, NORID},
+ {"all", ALL, NORID /* Extension */,},
+ {"sizeof", SIZEOF, NORID,},
+ {"__const__", TYPE_QUAL, RID_CONST},
+ {"__volatile", TYPE_QUAL, RID_VOLATILE},
+ {"extern", SCSPEC, RID_EXTERN,},
+ {"__volatile__", TYPE_QUAL, RID_VOLATILE},
+ {"__inline", SCSPEC, RID_INLINE},
+ {"exception", AGGR, RID_EXCEPTION /* Extension */,},
+ {"__inline__", SCSPEC, RID_INLINE},
+ {"case", CASE, NORID,},
+ {"except", EXCEPT, NORID /* Extension */,},
+ {"new", NEW, NORID,},
+ {"break", BREAK, NORID,},
+ {"goto", GOTO, NORID,},
+ {"",},
+ {"__attribute", ATTRIBUTE, NORID},
+ {"",},
+ {"__attribute__", ATTRIBUTE, NORID},
+ {"this", THIS, NORID,},
+ {"raise", RAISE, NORID /* Extension */,},
+ {"class", AGGR, RID_CLASS,},
+ {"delete", DELETE, NORID,},
+ {"typeof", TYPEOF, NORID,},
+ {"typedef", SCSPEC, RID_TYPEDEF,},
+ {"for", FOR, NORID,},
+ {"raises", RAISES, NORID /* Extension */,},
+ {"__const", TYPE_QUAL, RID_CONST},
+ {"double", TYPESPEC, RID_DOUBLE,},
+ {"__typeof__", TYPEOF, NORID},
+ {"",},
+ {"switch", SWITCH, NORID,},
+ {"auto", SCSPEC, RID_AUTO,},
+ {"do", DO, NORID,},
+ {"friend", SCSPEC, RID_FRIEND,},
+ {"",},
+ {"reraise", RERAISE, NORID /* Extension */,},
+ {"",},
+ {"volatile", TYPE_QUAL, RID_VOLATILE,},
+ {"__typeof", TYPEOF, NORID},
+ {"continue", CONTINUE, NORID,},
+ {"float", TYPESPEC, RID_FLOAT,},
+ {"const", TYPE_QUAL, RID_CONST,},
+ {"static", SCSPEC, RID_STATIC,},
+ {"virtual", SCSPEC, RID_VIRTUAL,},
+ {"__asm", ASM, NORID},
+ {"short", TYPESPEC, RID_SHORT,},
+ {"signed", TYPESPEC, RID_SIGNED,},
+ {"try", TRY, NORID /* Extension */,},
+ {"",}, {"",}, {"",},
+ {"__signed__", TYPESPEC, RID_SIGNED},
+ {"catch", CATCH, NORID,},
+ {"public", PUBLIC, NORID,},
+ {"struct", AGGR, RID_RECORD,},
+ {"if", IF, NORID,},
+ {"asm", ASM, NORID,},
+ {"union", AGGR, RID_UNION,},
+ {"",},
+ {"private", PRIVATE, NORID,},
+ {"",}, {"",}, {"",},
+ {"operator", OPERATOR, NORID,},
+ {"",}, {"",}, {"",},
+ {"default", DEFAULT, NORID,},
+ {"dynamic", DYNAMIC, NORID,},
+ {"overload", OVERLOAD, NORID,},
+ {"int", TYPESPEC, RID_INT,},
+ {"char", TYPESPEC, RID_CHAR,},
+ {"",}, {"",},
+ {"return", RETURN, NORID,},
+ {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
+ {"",}, {"",},
+ {"__signed", TYPESPEC, RID_SIGNED},
+ {"",},
+ {"void", TYPESPEC, RID_VOID,},
+ {"",}, {"",}, {"",},
+ {"protected", PROTECTED, NORID,},
+ {"",},
+ {"enum", ENUM, NORID,},
+ {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
+ {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
+ {"inline", SCSPEC, RID_INLINE,},
+ {"register", SCSPEC, RID_REGISTER,},
+ {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
+ {"",}, {"",}, {"",}, {"",},
+ {"unsigned", TYPESPEC, RID_UNSIGNED,},
+ };
+
+ if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+ {
+ register int key = hash (str, len);
+
+ if (key <= MAX_HASH_VALUE && key >= 0)
+ {
+ register const char *s = wordlist[key].name;
+
+ if (*s == *str && !strcmp (str + 1, s + 1))
+ return &wordlist[key];
+ }
+ }
+ return 0;
+}
diff --git a/apps/gperf/tests/test-4.exp b/apps/gperf/tests/test-4.exp
new file mode 100644
index 00000000000..5238bf94d98
--- /dev/null
+++ b/apps/gperf/tests/test-4.exp
@@ -0,0 +1,138 @@
+/* C code produced by gperf version 2.5 (GNU C++ version) */
+/* Command-line: ../src/gperf -D -p -t */
+/* Command-line: gperf -p -j1 -i 1 -g -o -t -N is_reserved_word -k1,3,$ c-parse.gperf */
+struct resword { char *name; short token; enum rid rid; };
+
+#define TOTAL_KEYWORDS 51
+#define MIN_WORD_LENGTH 2
+#define MAX_WORD_LENGTH 13
+#define MIN_HASH_VALUE 4
+#define MAX_HASH_VALUE 82
+/* maximum key range = 79, duplicates = 2 */
+
+static unsigned int
+hash (str, len)
+ register char *str;
+ register int unsigned len;
+{
+ static unsigned char asso_values[] =
+ {
+ 83, 83, 83, 83, 83, 83, 83, 83, 83, 83,
+ 83, 83, 83, 83, 83, 83, 83, 83, 83, 83,
+ 83, 83, 83, 83, 83, 83, 83, 83, 83, 83,
+ 83, 83, 83, 83, 83, 83, 83, 83, 83, 83,
+ 83, 83, 83, 83, 83, 83, 83, 83, 83, 83,
+ 83, 83, 83, 83, 83, 83, 83, 83, 83, 83,
+ 83, 83, 83, 83, 83, 83, 83, 83, 83, 83,
+ 83, 83, 83, 83, 83, 83, 83, 83, 83, 83,
+ 83, 83, 83, 83, 83, 83, 83, 83, 83, 83,
+ 83, 83, 83, 83, 83, 0, 83, 40, 20, 50,
+ 25, 10, 30, 0, 0, 50, 83, 0, 15, 0,
+ 35, 0, 83, 83, 20, 0, 10, 40, 5, 15,
+ 83, 83, 83, 83, 83, 83, 83, 83,
+ };
+ return len + asso_values[str[len - 1]] + asso_values[str[0]];
+}
+
+struct resword *
+in_word_set (str, len)
+ register char *str;
+ register unsigned int len;
+{
+ static struct resword wordlist[] =
+ {
+ {"",}, {"",}, {"",}, {"",},
+ {"goto", GOTO, NORID},
+ {"__asm", ASM, NORID},
+ {"switch", SWITCH, NORID},
+ {"__asm__", ASM, NORID},
+ {"__const__", TYPE_QUAL, RID_CONST},
+ {"__inline__", SCSPEC, RID_INLINE},
+ {"__typeof__", TYPEOF, NORID},
+ {"__signed__", TYPESPEC, RID_SIGNED},
+ {"__alignof__", ALIGNOF, NORID},
+ {"__volatile__", TYPE_QUAL, RID_VOLATILE},
+ {"__attribute__", ATTRIBUTE, NORID},
+ {"enum", ENUM, NORID},
+ {"short", TYPESPEC, RID_SHORT},
+ {"struct", STRUCT, NORID},
+ {"__const", TYPE_QUAL, RID_CONST},
+ {"__inline", SCSPEC, RID_INLINE},
+ {"long", TYPESPEC, RID_LONG},
+ {"__volatile", TYPE_QUAL, RID_VOLATILE},
+ {"__attribute", ATTRIBUTE, NORID},
+ {"volatile", TYPE_QUAL, RID_VOLATILE},
+ {"else", ELSE, NORID},
+ {"break", BREAK, NORID},
+ {"do", DO, NORID},
+ {"while", WHILE, NORID},
+ {"signed", TYPESPEC, RID_SIGNED},
+ {"__signed", TYPESPEC, RID_SIGNED},
+ {"void", TYPESPEC, RID_VOID},
+ {"sizeof", SIZEOF, NORID},
+ {"__typeof", TYPEOF, NORID},
+ {"__alignof", ALIGNOF, NORID},
+ {"double", TYPESPEC, RID_DOUBLE},
+ {"default", DEFAULT, NORID},
+ {"asm", ASM, NORID},
+ {"auto", SCSPEC, RID_AUTO},
+ {"float", TYPESPEC, RID_FLOAT},
+ {"typeof", TYPEOF, NORID},
+ {"typedef", SCSPEC, RID_TYPEDEF},
+ {"register", SCSPEC, RID_REGISTER},
+ {"extern", SCSPEC, RID_EXTERN},
+ {"for", FOR, NORID},
+ {"static", SCSPEC, RID_STATIC},
+ {"return", RETURN, NORID},
+ {"int", TYPESPEC, RID_INT},
+ {"case", CASE, NORID},
+ {"const", TYPE_QUAL, RID_CONST},
+ {"inline", SCSPEC, RID_INLINE},
+ {"continue", CONTINUE, NORID},
+ {"unsigned", TYPESPEC, RID_UNSIGNED},
+ {"char", TYPESPEC, RID_CHAR},
+ {"union", UNION, NORID},
+ {"if", IF, NORID},
+ };
+
+ static char lookup[] =
+ {
+ -1, -1, -1, -1, 4, 5, 6, 7, -1, 8, 100, 12, 13, 14,
+ 15, 16, 17, 18, 19, 20, 21, 22, -1, 23, 24, 25, -1, 26,
+ -9, -3, 27, 28, -1, 29, 30, -1, 31, -1, 32, 33, -1, 34,
+ 35, 36, 37, 38, 39, 40, 41, -1, -1, 42, -1, 43, -1, -1,
+ 44, -1, -1, -1, -1, 45, -1, 46, 47, 48, 49, -1, 50, -1,
+ -1, -1, -1, 51, 52, -1, -1, -1, -1, -1, 53, -1, 54,
+ };
+
+ if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+ {
+ register int key = hash (str, len);
+
+ if (key <= MAX_HASH_VALUE && key >= 0)
+ {
+ register int index = lookup[key];
+
+ if (index >= 0 && index < MAX_HASH_VALUE)
+ {
+ register char *s = wordlist[index].name;
+
+ if (*s == *str && !strcmp (str + 1, s + 1))
+ return &wordlist[index];
+ }
+ else if (index < 0 && index >= -MAX_HASH_VALUE)
+ return 0;
+ else
+ {
+ register int offset = key + index + (index > 0 ? -MAX_HASH_VALUE : MAX_HASH_VALUE);
+ register struct resword *base = &wordlist[-lookup[offset]];
+ register struct resword *ptr = base + -lookup[offset + 1];
+
+ while (--ptr >= base)
+ if (*str == *ptr->name && !strcmp (str + 1, ptr->name + 1))
+ return ptr;
+ }
+ }
+ }
+ return 0;
+}
diff --git a/apps/gperf/tests/test-5.exp b/apps/gperf/tests/test-5.exp
new file mode 100644
index 00000000000..101e2798d40
--- /dev/null
+++ b/apps/gperf/tests/test-5.exp
@@ -0,0 +1,111 @@
+/* C code produced by gperf version 2.5 (GNU C++ version) */
+/* Command-line: ../src/gperf -g -o -j1 -t -p -N is_reserved_word */
+/* ISO Pascal 7185 reserved words.
+ *
+ * For GNU Pascal compiler (GPC) by jtv@hut.fi
+ *
+ * run this through the Doug Schmidt's gperf program
+ * with command
+ * gperf -g -o -j1 -t -p -N is_reserved_word
+ *
+ */
+struct resword { char *name; short token; short iclass;};
+
+#define TOTAL_KEYWORDS 35
+#define MIN_WORD_LENGTH 2
+#define MAX_WORD_LENGTH 9
+#define MIN_HASH_VALUE 2
+#define MAX_HASH_VALUE 43
+/* maximum key range = 42, duplicates = 0 */
+
+#ifdef __GNUC__
+inline
+#endif
+static unsigned int
+hash (str, len)
+ register char *str;
+ register int unsigned len;
+{
+ static unsigned char asso_values[] =
+ {
+ 44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
+ 44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
+ 44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
+ 44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
+ 44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
+ 44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
+ 44, 44, 44, 44, 44, 18, 29, 14, 6, 7,
+ 10, 20, 44, 28, 44, 44, 28, 19, 22, 15,
+ 0, 44, 9, 23, 0, 23, 26, 2, 44, 44,
+ 44, 44, 44, 44, 44, 44, 44, 44, 44, 44,
+ 0, 0, 13, 44, 30, 44, 44, 44, 0, 25,
+ 1, 0, 44, 44, 0, 44, 1, 44, 25, 44,
+ 44, 0, 44, 44, 44, 44, 44, 44,
+ };
+ return len + asso_values[str[len - 1]] + asso_values[str[0]];
+}
+
+#ifdef __GNUC__
+inline
+#endif
+struct resword *
+is_reserved_word (str, len)
+ register char *str;
+ register unsigned int len;
+{
+ static struct resword wordlist[] =
+ {
+ {"",}, {"",},
+ {"To", TO, PASCAL_ISO},
+ {"",},
+ {"Type", TYPE, PASCAL_ISO},
+ {"Then", THEN, PASCAL_ISO},
+ {"Packed", PACKED, PASCAL_ISO},
+ {"While", WHILE, PASCAL_ISO},
+ {"Do", DO, PASCAL_ISO},
+ {"Procedure", PROCEDURE, PASCAL_ISO},
+ {"End", END, PASCAL_ISO},
+ {"Else", ELSE, PASCAL_ISO},
+ {"Downto", DOWNTO, PASCAL_ISO},
+ {"For", FOR, PASCAL_ISO},
+ {"File", FILE_, PASCAL_ISO},
+ {"Record", RECORD, PASCAL_ISO},
+ {"Repeat", REPEAT, PASCAL_ISO},
+ {"Or", OR, PASCAL_ISO},
+ {"Case", CASE, PASCAL_ISO},
+ {"Function", FUNCTION, PASCAL_ISO},
+ {"Const", CONST, PASCAL_ISO},
+ {"And", AND, PASCAL_ISO},
+ {"Mod", MOD, PASCAL_ISO},
+ {"Array", ARRAY, PASCAL_ISO},
+ {"Goto", GOTO, PASCAL_ISO},
+ {"Nil", NIL, PASCAL_ISO},
+ {"Not", NOT, PASCAL_ISO},
+ {"Set", SET, PASCAL_ISO},
+ {"Until", UNTIL, PASCAL_ISO},
+ {"Var", VAR, PASCAL_ISO},
+ {"Of", OF, PASCAL_ISO},
+ {"In", IN, PASCAL_ISO},
+ {"Program", PROGRAM,PASCAL_ISO},
+ {"Label", LABEL, PASCAL_ISO},
+ {"Div", DIV, PASCAL_ISO},
+ {"Begin", BEGIN_, PASCAL_ISO},
+ {"With", WITH, PASCAL_ISO},
+ {"",}, {"",}, {"",}, {"",}, {"",}, {"",},
+ {"If", IF, PASCAL_ISO},
+ };
+
+ if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
+ {
+ register int key = hash (str, len);
+
+ if (key <= MAX_HASH_VALUE && key >= 0)
+ {
+ register char *s = wordlist[key].name;
+
+ if (*s == *str && !strcmp (str + 1, s + 1))
+ return &wordlist[key];
+ }
+ }
+ return 0;
+}
diff --git a/apps/gperf/tests/test-6.exp b/apps/gperf/tests/test-6.exp
new file mode 100644
index 00000000000..eba6e3cac9a
--- /dev/null
+++ b/apps/gperf/tests/test-6.exp
@@ -0,0 +1,74 @@
+-a Generate ANSI standard C output code, i.e., function prototypes.
+-c Generate comparison code using strncmp rather than strcmp.
+-C Make the contents of generated lookup tables constant, i.e., readonly.
+-d Enables the debugging option (produces verbose output to the standard error).
+-D Handle keywords that hash to duplicate values. This is useful
+ for certain highly redundant keyword sets. It enables the -S option.
+-e Allow user to provide a string containing delimiters used to separate
+ keywords from their attributes. Default is ",\n"
+-E Define constant values using an enum local to the lookup function
+ rather than with defines
+-f Generate the gen-perf.hash function ``fast.'' This decreases GPERF's
+ running time at the cost of minimizing generated table-size.
+ The numeric argument represents the number of times to iterate when
+ resolving a collision. `0' means ``iterate by the number of keywords.''
+-g Assume a GNU compiler, e.g., g++ or gcc. This makes all generated
+ routines use the ``inline'' keyword to remove cost of function calls.
+-G Generate the static table of keywords as a static global variable,
+ rather than hiding it inside of the lookup function (which is the
+ default behavior).
+-h Prints this mesage.
+-H Allow user to specify name of generated hash function. Default
+ is `hash'.
+-i Provide an initial value for the associate values array. Default is 0.
+ Setting this value larger helps inflate the size of the final table.
+-j Affects the ``jump value,'' i.e., how far to advance the associated
+ character value upon collisions. Must be an odd number, default is 5.
+-k Allows selection of the key positions used in the hash function.
+ The allowable choices range between 1-126, inclusive. The positions
+ are separated by commas, ranges may be used, and key positions may
+ occur in any order. Also, the meta-character '*' causes the generated
+ hash function to consider ALL key positions, and $ indicates the
+ ``final character'' of a key, e.g., $,1,2,4,6-10.
+-K Allow use to select name of the keyword component in the keyword structure.
+-l Compare key lengths before trying a string comparison. This helps
+ cut down on the number of string comparisons made during the lookup.
+-L Generates code in the language specified by the option's argument. Languages
+ handled are currently C++ and C. The default is C.
+-n Do not include the length of the keyword when computing the hash function
+-N Allow user to specify name of generated lookup function. Default
+ name is `in_word_set.'
+-o Reorders input keys by frequency of occurrence of the key sets.
+ This should decrease the search time dramatically.
+-p Changes the return value of the generated function ``in_word_set''
+ from its default boolean value (i.e., 0 or 1), to type ``pointer
+ to wordlist array'' This is most useful when the -t option, allowing
+ user-defined structs, is used.
+-r Utilizes randomness to initialize the associated values table.
+-s Affects the size of the generated hash table. The numeric argument
+ for this option indicates ``how many times larger or smaller'' the associated
+ value range should be, in relationship to the number of keys, e.g. a value of 3
+ means ``allow the maximum associated value to be about 3 times larger than the
+ number of input keys.'' Conversely, a value of -3 means ``make the maximum
+ associated value about 3 times smaller than the number of input keys.
+ A larger table should decrease the time required for an unsuccessful search,
+ at the expense of extra table space. Default value is 1.
+-S Causes the generated C code to use a switch statement scheme, rather
+ than an array lookup table. This can lead to a reduction in both
+ time and space requirements for some keyfiles. The argument to
+ this option determines how many switch statements are generated.
+ A value of 1 generates 1 switch containing all the elements, a value of 2
+ generates 2 tables with 1/2 the elements in each table, etc. This
+ is useful since many C compilers cannot correctly generate code for
+ large switch statements.
+-t Allows the user to include a structured type declaration for
+ generated code. Any text before %% is consider part of the type
+ declaration. Key words and additional fields may follow this, one
+ group of fields per line.
+-T Prevents the transfer of the type declaration to the output file.
+ Use this option if the type is already defined elsewhere.
+-v Prints out the current version number
+-Z Allow user to specify name of generated C++ class. Default
+ name is `Perfect_Hash.'
+Usage: ../src/gperf [-acCdDef[num]gGhH<hashname>i<init>jk<keys>K<keyname>lL<language>nN<function name>oprs<size>S<switches>tTvZ<class name>].
+(type ../src/gperf -h for help)
diff --git a/apps/gperf/tests/test-7.exp b/apps/gperf/tests/test-7.exp
new file mode 100644
index 00000000000..c5c942c10d1
--- /dev/null
+++ b/apps/gperf/tests/test-7.exp
@@ -0,0 +1,32 @@
+in word set if
+in word set do
+NOT in word set int
+in word set for
+in word set case
+NOT in word set char
+NOT in word set auto
+in word set goto
+in word set else
+NOT in word set long
+NOT in word set void
+NOT in word set enum
+NOT in word set float
+NOT in word set short
+NOT in word set union
+NOT in word set break
+in word set while
+NOT in word set const
+NOT in word set double
+NOT in word set static
+NOT in word set extern
+NOT in word set struct
+in word set return
+NOT in word set sizeof
+NOT in word set switch
+NOT in word set signed
+NOT in word set typedef
+NOT in word set default
+NOT in word set unsigned
+NOT in word set continue
+NOT in word set register
+NOT in word set volatile
diff --git a/apps/gperf/tests/test.c b/apps/gperf/tests/test.c
new file mode 100644
index 00000000000..35d9015bba7
--- /dev/null
+++ b/apps/gperf/tests/test.c
@@ -0,0 +1,28 @@
+/*
+// @(#)test.c 1.1 10/18/96
+
+ Tests the generated perfect has function.
+ The -v option prints diagnostics as to whether a word is in
+ the set or not. Without -v the program is useful for timing.
+*/
+
+#include <stdio.h>
+
+#define MAX_LEN 80
+
+int
+main (argc, argv)
+ int argc;
+ char *argv[];
+{
+ int verbose = argc > 1 ? 1 : 0;
+ char buf[MAX_LEN];
+
+ while (gets (buf))
+ if (in_word_set (buf, strlen (buf)) && verbose)
+ printf ("in word set %s\n", buf);
+ else if (verbose)
+ printf ("NOT in word set %s\n", buf);
+
+ return 0;
+}