summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBruno Haible <bruno@clisp.org>2003-02-10 14:21:58 +0000
committerBruno Haible <bruno@clisp.org>2003-02-10 14:21:58 +0000
commit6202aaadb1a2904f456c2ee55623bf4a1a951ad7 (patch)
tree756e087bf915a628623eb1745ea6e1861beca3d9
parentef37a53d732206d047aad6ffffa9ea88b1dbb2c3 (diff)
downloadgperf-6202aaadb1a2904f456c2ee55623bf4a1a951ad7.tar.gz
Implement % declarations.
-rw-r--r--ChangeLog30
-rw-r--r--NEWS18
-rw-r--r--doc/gperf.texi299
-rw-r--r--src/input.cc298
-rw-r--r--src/options.cc119
-rw-r--r--src/options.h26
-rw-r--r--src/options.icc11
-rw-r--r--src/output.cc10
8 files changed, 721 insertions, 90 deletions
diff --git a/ChangeLog b/ChangeLog
index 1fd6014..5b05340 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,35 @@
2002-11-16 Bruno Haible <bruno@clisp.org>
+ * src/options.h (Options::get_slot_name): Renamed from
+ Options::get_key_name.
+ (Options::set, Options::set_language, Options::set_total_switches,
+ Options::set_function_name, Options::set_slot_name,
+ Options::set_class_name, Options::set_hash_name,
+ Options::set_wordlist_name, Options::set_delimiters): New method
+ declarations.
+ (Options::_language): New field.
+ (Options::_slot_name): Renamed from Options::_key_name.
+ * src/options.icc (Options::set): New method.
+ (Options::get_slot_name): Renamed from Options::get_key_name.
+ * src/options.cc (DEFAULT_FUNCTION_NAME): Renamed from DEFAULT_NAME.
+ (DEFAULT_SLOT_NAME): Renamed from DEFAULT_NAME.
+ (Options::Options): Initialize _language. Update.
+ (Options::~Options): Update.
+ (Options::set_language, Options::set_total_switches,
+ Options::set_function_name, Options::set_slot_name,
+ Options::set_class_name, Options::set_hash_name,
+ Options::set_wordlist_name, Options::set_delimiters): New methods.
+ (Options::parse_options): Call set_language. Update.
+ * src/input.cc (is_declaration, is_declaration_with_arg,
+ is_define_declaration): New functions.
+ (Input::read_input): Accept %DECL declarations.
+ * src/output.cc (Output::output_lookup_function_body): Update.
+ * doc/gperf.texi (Declarations): Add new subnodes.
+ (User-supplied Struct, Gperf Declarations, C Code Inclusion): New
+ nodes.
+ (Keywords, Output Format, Binary Strings, Options): Mention %
+ declarations as being equivalent to the command line options.
+
* src/options.cc (Options::long_usage): Rename options -H, -N, -l, -G.
(long_options): Add --hash-function-name, --lookup-function-name,
--compare-lengths.
diff --git a/NEWS b/NEWS
index 7b58f64..23ab6da 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,24 @@ New in 2.8:
--compare-strlen --> --compare-lengths
--global --> --global-table
The older variants are still supported for backward compatibility.
+* The following options can now be specified inside the input file:
+ %delimiters=DELIMITER-LIST
+ %struct-type
+ %language=LANGUAGE-NAME
+ %define slot-name NAME
+ %define hash-function-name NAME
+ %define lookup-function-name NAME
+ %define class-name NAME
+ %7bit
+ %compare-lengths
+ %compare-strncmp
+ %readonly-tables
+ %enum
+ %includes
+ %global-table
+ %define word-array-name NAME
+ %switch=COUNT
+ %omit-struct-type
* If the input file is given by name, the output file will now contain
#line directives referring to the input file.
* Bug fixes.
diff --git a/doc/gperf.texi b/doc/gperf.texi
index 84fa53b..a0ba22c 100644
--- a/doc/gperf.texi
+++ b/doc/gperf.texi
@@ -7,7 +7,7 @@
@c some day we should @include version.texi instead of defining
@c these values at hand.
-@set UPDATED 12 November 2002
+@set UPDATED 16 November 2002
@set EDITION 2.7.2
@set VERSION 2.7.2
@c ---------------------
@@ -118,10 +118,16 @@ High-Level Description of GNU @code{gperf}
Input Format to @code{gperf}
-* Declarations:: @code{struct} Declarations and C Code Inclusion.
+* Declarations:: Declarations.
* Keywords:: Format for Keyword Entries.
* Functions:: Including Additional C Functions.
+Declarations
+
+* User-supplied Struct:: Specifying keywords with attributes.
+* Gperf Declarations:: Embedding command line options in the input.
+* C Code Inclusion:: Including C declarations and definitions.
+
Invoking @code{gperf}
* Input Details:: Options that affect Interpretation of the Input File
@@ -314,27 +320,54 @@ functions
@end group
@end example
-@emph{Unlike} @code{flex} or @code{bison}, all sections of
-@code{gperf}'s input are optional. The following sections describe the
+@emph{Unlike} @code{flex} or @code{bison}, the declarations section and
+the functions section are optional. The following sections describe the
input format for each section.
@menu
-* Declarations:: @code{struct} Declarations and C Code Inclusion.
+* Declarations:: Declarations.
* Keywords:: Format for Keyword Entries.
* Functions:: Including Additional C Functions.
@end menu
+It is possible to omit the declaration section entirely, if the @samp{-t}
+option is not given. In this case the input file begins directly with the
+first keyword line, e.g.:
+
+@example
+@group
+january
+february
+march
+april
+...
+@end group
+@end example
+
@node Declarations, Keywords, Input Format, Input Format
-@subsection @code{struct} Declarations and C Code Inclusion
+@subsection Declarations
The keyword input file optionally contains a section for including
-arbitrary C declarations and definitions, as well as provisions for
-providing a user-supplied @code{struct}. If the @samp{-t} option
+arbitrary C declarations and definitions, @code{gperf} declarations that
+act like command-line options, as well as for providing a user-supplied
+@code{struct}.
+
+@menu
+* User-supplied Struct:: Specifying keywords with attributes.
+* Gperf Declarations:: Embedding command line options in the input.
+* C Code Inclusion:: Including C declarations and definitions.
+@end menu
+
+@node User-supplied Struct, Gperf Declarations, Declarations, Declarations
+@subsubsection User-supplied @code{struct}
+
+If the @samp{-t} option (or, equivalently, the @samp{%struct-type} declaration)
@emph{is} enabled, you @emph{must} provide a C @code{struct} as the last
component in the declaration section from the input file. The first
field in this struct must be a @code{char *} or @code{const char *}
identifier called @samp{name}, although it is possible to modify this
-field's name with the @samp{-K} option described below.
+field's name with the @samp{-K} option (or, equivalently, the
+@samp{%define slot-name}) described below.
Here is a simple example, using months of the year and their attributes as
input:
@@ -364,6 +397,174 @@ other fields are a pair of consecutive percent signs, @samp{%%},
appearing left justified in the first column, as in the UNIX utility
@code{lex}.
+@node Gperf Declarations, C Code Inclusion, User-supplied Struct, Declarations
+@subsubsection Gperf Declarations
+
+The declaration section can contain @code{gperf} declarations. They
+influence the way @code{gperf} works, like command line options do.
+In fact, every such declaration is equivalent to a command line option.
+There are three forms of declarations:
+
+@enumerate
+@item
+Declarations without argument, like @samp{%compare-lengths}.
+
+@item
+Declarations with an argument, like @samp{%switch=@var{count}}.
+
+@item
+Declarations of names of entities in the output file, like
+@samp{%define lookup-function-name @var{name}}.
+@end enumerate
+
+When a declaration is given both in the input file and as a command line
+option, the command-line option's value prevails.
+
+The following @code{gperf} declarations are available.
+
+@table @samp
+@item %delimiters=@var{delimiter-list}
+@cindex @samp{%delimiters}
+Allows you to provide a string containing delimiters used to
+separate keywords from their attributes. The default is ",". This
+option is essential if you want to use keywords that have embedded
+commas or newlines.
+
+@item %struct-type
+@cindex @samp{%struct-type}
+Allows you to include a @code{struct} type declaration for generated
+code; see above for an example.
+
+@item %language=@var{language-name}
+@cindex @samp{%language}
+Instructs @code{gperf} to generate code in the language specified by the
+option's argument. Languages handled are currently:
+
+@table @samp
+@item KR-C
+Old-style K&R C. This language is understood by old-style C compilers and
+ANSI C compilers, but ANSI C compilers may flag warnings (or even errors)
+because of lacking @samp{const}.
+
+@item C
+Common C. This language is understood by ANSI C compilers, and also by
+old-style C compilers, provided that you @code{#define const} to empty
+for compilers which don't know about this keyword.
+
+@item ANSI-C
+ANSI C. This language is understood by ANSI C compilers and C++ compilers.
+
+@item C++
+C++. This language is understood by C++ compilers.
+@end table
+
+The default is C.
+
+@item %define slot-name @var{name}
+@cindex @samp{%define slot-name}
+This option is only useful when option @samp{-t} (or, equivalently, the
+@samp{%struct-type} declaration) has been given.
+By default, the program assumes the structure component identifier for
+the keyword is @samp{name}. This option allows an arbitrary choice of
+identifier for this component, although it still must occur as the first
+field in your supplied @code{struct}.
+
+@item %define hash-function-name @var{name}
+@cindex @samp{%define hash-function-name}
+Allows you to specify the name for the generated hash function. Default
+name is @samp{hash}. This option permits the use of two hash tables in
+the same file.
+
+@item %define lookup-function-name @var{name}
+@cindex @samp{%define lookup-function-name}
+Allows you to specify the name for the generated lookup function.
+Default name is @samp{in_word_set}. This option permits multiple
+generated hash functions to be used in the same application.
+
+@item %define class-name @var{name}
+@cindex @samp{%define class-name}
+This option is only useful when option @samp{-L C++} (or, equivalently,
+the @samp{%language=C++} declaration) has been given. It
+allows you to specify the name of generated C++ class. Default name is
+@code{Perfect_Hash}.
+
+@item %7bit
+@cindex @samp{%7bit}
+This option specifies that all strings that will be passed as arguments
+to the generated hash function and the generated lookup function will
+solely consist of 7-bit ASCII characters (bytes in the range 0..127).
+(Note that the ANSI C functions @code{isalnum} and @code{isgraph} do
+@emph{not} guarantee that a byte is in this range. Only an explicit
+test like @samp{c >= 'A' && c <= 'Z'} guarantees this.)
+
+@item %compare-lengths
+@cindex @samp{%compare-lengths}
+Compare keyword lengths before trying a string comparison. This option
+is mandatory for binary comparisons (@pxref{Binary Strings}). It also might
+cut down on the number of string comparisons made during the lookup, since
+keywords with different lengths are never compared via @code{strcmp}.
+However, using @samp{%compare-lengths} might greatly increase the size of the
+generated C code if the lookup table range is large (which implies that
+the switch option @samp{-S} or @samp{%switch} is not enabled), since the length
+table contains as many elements as there are entries in the lookup table.
+
+@item %compare-strncmp
+@cindex @samp{%compare-strncmp}
+Generates C code that uses the @code{strncmp} function to perform
+string comparisons. The default action is to use @code{strcmp}.
+
+@item %readonly-tables
+@cindex @samp{%readonly-tables}
+Makes the contents of all generated lookup tables constant, i.e.,
+``readonly''. Many compilers can generate more efficient code for this
+by putting the tables in readonly memory.
+
+@item %enum
+@cindex @samp{%enum}
+Define constant values using an enum local to the lookup function rather
+than with #defines. This also means that different lookup functions can
+reside in the same file. Thanks to James Clark @code{<jjc@@ai.mit.edu>}.
+
+@item %includes
+@cindex @samp{%includes}
+Include the necessary system include file, @code{<string.h>}, at the
+beginning of the code. By default, this is not done; the user must
+include this header file himself to allow compilation of the code.
+
+@item %global-table
+@cindex @samp{%global-table}
+Generate the static table of keywords as a static global variable,
+rather than hiding it inside of the lookup function (which is the
+default behavior).
+
+@item %define word-array-name @var{name}
+@cindex @samp{%define word-array-name}
+Allows you to specify the name for the generated array containing the
+hash table. Default name is @samp{wordlist}. This option permits the
+use of two hash tables in the same file, even when the option @samp{-G}
+(or, equivalently, the @samp{%global-table} declaration) is given.
+
+@item %switch=@var{count}
+@cindex @samp{%switch}
+Causes the generated C code to use a @code{switch} statement scheme,
+rather than an array lookup table. This can lead to a reduction in both
+time and space requirements for some input files. The argument to this
+option determines how many @code{switch} statements are generated. A
+value of 1 generates 1 @code{switch} containing all the elements, a
+value of 2 generates 2 tables with 1/2 the elements in each
+@code{switch}, etc. This is useful since many C compilers cannot
+correctly generate code for large @code{switch} statements. This option
+was inspired in part by Keith Bostic's original C program.
+
+@item %omit-struct-type
+@cindex @samp{%omit-struct-type}
+Prevents the transfer of the type declaration to the output file. Use
+this option if the type is already defined elsewhere.
+@end table
+
+@node C Code Inclusion, , Gperf Declarations, Declarations
+@subsubsection C Code Inclusion
+
@cindex @samp{%@{}
@cindex @samp{%@}}
Using a syntax similar to GNU utilities @code{flex} and @code{bison}, it
@@ -389,20 +590,6 @@ march, 3, 31, 31
@end group
@end example
-It is possible to omit the declaration section entirely, if the @samp{-t}
-option is not given. In this case
-the input file begins directly with the first keyword line, e.g.:
-
-@example
-@group
-january
-february
-march
-april
-...
-@end group
-@end example
-
@node Keywords, Functions, Declarations, Input Format
@subsection Format for Keyword Entries
@@ -446,7 +633,8 @@ Additional fields may optionally follow the leading keyword. Fields
should be separated by commas, and terminate at the end of line. What
these fields mean is entirely up to you; they are used to initialize the
elements of the user-defined @code{struct} provided by you in the
-declaration section. If the @samp{-t} option is @emph{not} enabled
+declaration section. If the @samp{-t} option (or, equivalently, the
+@samp{%struct-type} declaration) is @emph{not} enabled
these fields are simply ignored. All previous examples except the last
one contain keyword attributes.
@@ -479,18 +667,21 @@ local static array. The associated values table is constructed
internally by @code{gperf} and later output as a static local C array
called @samp{hash_table}. The relevant selected positions (i.e. indices
into @var{str}) are specified via the @samp{-k} option when running
-@code{gperf}, as detailed in the @emph{Options} section below(@pxref{Options}).
+@code{gperf}, as detailed in the @emph{Options} section below (@pxref{Options}).
@end deftypefun
@deftypefun {} in_word_set (const char * @var{str}, unsigned int @var{len})
If @var{str} is in the keyword set, returns a pointer to that
-keyword. More exactly, if the option @samp{-t} was given, it returns
+keyword. More exactly, if the option @samp{-t} (or, equivalently, the
+@samp{%struct-type} declaration) was given, it returns
a pointer to the matching keyword's structure. Otherwise it returns
@code{NULL}.
@end deftypefun
-If the option @samp{-c} is not used, @var{str} must be a NUL terminated
-string of exactly length @var{len}. If @samp{-c} is used, @var{str} must
+If the option @samp{-c} (or, equivalently, the @samp{%compare-strncmp}
+declaration) is not used, @var{str} must be a NUL terminated
+string of exactly length @var{len}. If @samp{-c} (or, equivalently, the
+@samp{%compare-strncmp} declaration) is used, @var{str} must
simply be an array of @var{len} bytes and does not need to be NUL
terminated.
@@ -512,7 +703,9 @@ degree of optimization, this method often results in smaller and faster
code.
@end table
-If the @samp{-t} and @samp{-S} options are omitted, the default action
+If the @samp{-t} and @samp{-S} options (or, equivalently, the
+@samp{%struct-type} and @samp{%switch} declarations) are omitted, the default
+action
is to generate a @code{char *} array containing the keywords, together with
additional empty strings used for padding the array. By experimenting
with the various input and output options, and timing the resulting C
@@ -529,17 +722,20 @@ that the keywords in the input file must not contain NUL bytes,
and the @var{str} argument passed to @code{hash} or @code{in_word_set}
must be NUL terminated and have exactly length @var{len}.
-If option @samp{-c} is used, then the @var{str} argument does not need
+If option @samp{-c} (or, equivalently, the @samp{%compare-strncmp}
+declaration) is used, then the @var{str} argument does not need
to be NUL terminated. The code generated by @code{gperf} will only
access the first @var{len}, not @var{len+1}, bytes starting at @var{str}.
However, the keywords in the input file still must not contain NUL
bytes.
-If option @samp{-l} is used, then the hash table performs binary
+If option @samp{-l} (or, equivalently, the @samp{%compare-lengths}
+declaration) is used, then the hash table performs binary
comparison. The keywords in the input file may contain NUL bytes,
written in string syntax as @code{\000} or @code{\x00}, and the code
generated by @code{gperf} will treat NUL like any other byte.
-Also, in this case the @samp{-c} option is ignored.
+Also, in this case the @samp{-c} option (or, equivalently, the
+@samp{%compare-strncmp} declaration) is ignored.
@node Options, Bugs, Description, Top
@chapter Invoking @code{gperf}
@@ -572,11 +768,14 @@ or if it is @samp{-}.
@node Input Details, Output Language, Output File, Options
@section Options that affect Interpretation of the Input File
+These options are also available as declarations in the input file
+(@pxref{Gperf Declarations}).
+
@table @samp
@item -e @var{keyword-delimiter-list}
@itemx --delimiters=@var{keyword-delimiter-list}
@cindex Delimiters
-Allows the user to provide a string containing delimiters used to
+Allows you to provide a string containing delimiters used to
separate keywords from their attributes. The default is ",". This
option is essential if you want to use keywords that have embedded
commas or newlines. One useful trick is to use -e'TAB', where TAB is
@@ -595,6 +794,9 @@ Modula 3 and JavaScript reserved words are distributed with this release.
@node Output Language, Output Details, Input Details, Options
@section Options to specify the Language for the Output Code
+These options are also available as declarations in the input file
+(@pxref{Gperf Declarations}).
+
@table @samp
@item -L @var{generated-language-name}
@itemx --language=@var{generated-language-name}
@@ -633,20 +835,25 @@ This option is supported for compatibility with previous releases of
@node Output Details, Algorithmic Details, Output Language, Options
@section Options for fine tuning Details in the Output Code
+Most of these options are also available as declarations in the input file
+(@pxref{Gperf Declarations}).
+
@table @samp
@item -K @var{slot-name}
@itemx --slot-name=@var{slot-name}
@cindex Slot name
-This option is only useful when option @samp{-t} has been given.
+This option is only useful when option @samp{-t} (or, equivalently, the
+@samp{%struct-type} declaration) has been given.
By default, the program assumes the structure component identifier for
-the keyword is @samp{slot-name}. This option allows an arbitrary choice of
+the keyword is @samp{name}. This option allows an arbitrary choice of
identifier for this component, although it still must occur as the first
field in your supplied @code{struct}.
@item -F @var{initializers}
@itemx --initializer-suffix=@var{initializers}
@cindex Initializers
-This option is only useful when option @samp{-t} has been given.
+This option is only useful when option @samp{-t} (or, equivalently, the
+@samp{%struct-type} declaration) has been given.
It permits to specify initializers for the structure members following
@var{slot-name} in empty hash table entries. The list of initializers
should start with a comma. By default, the emitted code will
@@ -661,14 +868,14 @@ the same file.
@item -N @var{lookup-function-name}
@itemx --lookup-function-name=@var{lookup-function-name}
Allows you to specify the name for the generated lookup function.
-Default name is @samp{in_word_set}. This option permits completely
-automatic generation of perfect hash functions, especially when multiple
-generated hash functions are used in the same application.
+Default name is @samp{in_word_set}. This option permits multiple
+generated hash functions to be used in the same application.
@item -Z @var{class-name}
@itemx --class-name=@var{class-name}
@cindex Class name
-This option is only useful when option @samp{-L C++} has been given. It
+This option is only useful when option @samp{-L C++} (or, equivalently,
+the @samp{%language=C++} declaration) has been given. It
allows you to specify the name of generated C++ class. Default name is
@code{Perfect_Hash}.
@@ -691,8 +898,8 @@ cut down on the number of string comparisons made during the lookup, since
keywords with different lengths are never compared via @code{strcmp}.
However, using @samp{-l} might greatly increase the size of the
generated C code if the lookup table range is large (which implies that
-the switch option @samp{-S} is not enabled), since the length table
-contains as many elements as there are entries in the lookup table.
+the switch option @samp{-S} or @samp{%switch} is not enabled), since the length
+table contains as many elements as there are entries in the lookup table.
@item -c
@itemx --compare-strncmp
@@ -729,7 +936,7 @@ default behavior).
Allows you to specify the name for the generated array containing the
hash table. Default name is @samp{wordlist}. This option permits the
use of two hash tables in the same file, even when the option @samp{-G}
-is given.
+(or, equivalently, the @samp{%global-table} declaration) is given.
@item -S @var{total-switch-statements}
@itemx --switch=@var{total-switch-statements}
@@ -836,7 +1043,8 @@ choose the best results. This increases the running time by a factor of
Provides an initial @var{value} for the associate values array. Default
is 0. Increasing the initial value helps inflate the final table size,
possibly leading to more time efficient keyword lookups. Note that this
-option is not particularly useful when @samp{-S} is used. Also,
+option is not particularly useful when @samp{-S} (or, equivalently,
+@samp{%switch}) is used. Also,
@samp{-i} is overridden when the @samp{-r} option is used.
@item -j @var{jump-value}
@@ -896,7 +1104,8 @@ values are useful for limiting the overall size of the generated hash
table, though this usually increases the number of duplicate hash
values.
-If `generate switch' option @samp{-S} is @emph{not} enabled, the maximum
+If `generate switch' option @samp{-S} (or, equivalently, @samp{%switch}) is
+@emph{not} enabled, the maximum
associated value influences the static array table size, and a larger
table should decrease the time required for an unsuccessful search, at
the expense of extra table space.
diff --git a/src/input.cc b/src/input.cc
index a0b8f0e..fbb0bea 100644
--- a/src/input.cc
+++ b/src/input.cc
@@ -46,6 +46,187 @@ pretty_input_file_name ()
return "(standard input)";
}
+/* Returns true if the given line contains a "%DECL" declaration. */
+static bool
+is_declaration (const char *line, const char *line_end, unsigned int lineno,
+ const char *decl)
+{
+ /* Skip '%'. */
+ line++;
+
+ /* Skip DECL. */
+ for (const char *d = decl; *d; d++)
+ {
+ if (!(line < line_end))
+ return false;
+ if (!(*line == *d || (*d == '-' && *line == '_')))
+ return false;
+ line++;
+ }
+ if (line < line_end
+ && ((*line >= 'A' && *line <= 'Z')
+ || (*line >= 'a' && *line <= 'z')
+ || *line == '-' || *line == '_'))
+ return false;
+
+ /* OK, found DECL. */
+
+ /* Skip whitespace. */
+ while (line < line_end && (*line == ' ' || *line == '\t'))
+ line++;
+
+ /* Expect end of line. */
+ if (line < line_end && *line != '\n')
+ {
+ fprintf (stderr, "%s:%u: junk after declaration\n",
+ pretty_input_file_name (), lineno);
+ exit (1);
+ }
+
+ return true;
+}
+
+/* Tests if the given line contains a "%DECL=ARG" declaration.
+ If yes, it sets *ARGP to the argument, and returns true.
+ Otherwise, it returns false. */
+static bool
+is_declaration_with_arg (const char *line, const char *line_end,
+ unsigned int lineno,
+ const char *decl, char **argp)
+{
+ /* Skip '%'. */
+ line++;
+
+ /* Skip DECL. */
+ for (const char *d = decl; *d; d++)
+ {
+ if (!(line < line_end))
+ return false;
+ if (!(*line == *d || (*d == '-' && *line == '_')))
+ return false;
+ line++;
+ }
+ if (line < line_end
+ && ((*line >= 'A' && *line <= 'Z')
+ || (*line >= 'a' && *line <= 'z')
+ || *line == '-' || *line == '_'))
+ return false;
+
+ /* OK, found DECL. */
+
+ /* Skip '='. */
+ if (!(line < line_end && *line == '='))
+ {
+ fprintf (stderr, "%s:%u: missing argument in %%%s=ARG declaration.\n",
+ pretty_input_file_name (), lineno, decl);
+ exit (1);
+ }
+ line++;
+
+ /* The next word is the argument. */
+ char *arg = new char[line_end - line + 1];
+ char *p = arg;
+ while (line < line_end && !(*line == ' ' || *line == '\t' || *line == '\n'))
+ *p++ = *line++;
+ *p = '\0';
+
+ /* Skip whitespace. */
+ while (line < line_end && (*line == ' ' || *line == '\t'))
+ line++;
+
+ /* Expect end of line. */
+ if (line < line_end && *line != '\n')
+ {
+ fprintf (stderr, "%s:%u: junk after declaration\n",
+ pretty_input_file_name (), lineno);
+ exit (1);
+ }
+
+ *argp = arg;
+ return true;
+}
+
+/* Tests if the given line contains a "%define DECL ARG" declaration.
+ If yes, it sets *ARGP to the argument, and returns true.
+ Otherwise, it returns false. */
+static bool
+is_define_declaration (const char *line, const char *line_end,
+ unsigned int lineno,
+ const char *decl, char **argp)
+{
+ /* Skip '%'. */
+ line++;
+
+ /* Skip "define". */
+ {
+ for (const char *d = "define"; *d; d++)
+ {
+ if (!(line < line_end))
+ return false;
+ if (!(*line == *d))
+ return false;
+ line++;
+ }
+ if (!(line < line_end && (*line == ' ' || *line == '\t')))
+ return false;
+ }
+
+ /* Skip whitespace. */
+ while (line < line_end && (*line == ' ' || *line == '\t'))
+ line++;
+
+ /* Skip DECL. */
+ for (const char *d = decl; *d; d++)
+ {
+ if (!(line < line_end))
+ return false;
+ if (!(*line == *d || (*d == '-' && *line == '_')))
+ return false;
+ line++;
+ }
+ if (line < line_end
+ && ((*line >= 'A' && *line <= 'Z')
+ || (*line >= 'a' && *line <= 'z')
+ || *line == '-' || *line == '_'))
+ return false;
+
+ /* OK, found DECL. */
+
+ /* Skip whitespace. */
+ if (!(line < line_end && (*line == ' ' || *line == '\t')))
+ {
+ fprintf (stderr, "%s:%u:"
+ " missing argument in %%define %s ARG declaration.\n",
+ pretty_input_file_name (), lineno, decl);
+ exit (1);
+ }
+ do
+ line++;
+ while (line < line_end && (*line == ' ' || *line == '\t'));
+
+ /* The next word is the argument. */
+ char *arg = new char[line_end - line + 1];
+ char *p = arg;
+ while (line < line_end && !(*line == ' ' || *line == '\t' || *line == '\n'))
+ *p++ = *line++;
+ *p = '\0';
+
+ /* Skip whitespace. */
+ while (line < line_end && (*line == ' ' || *line == '\t'))
+ line++;
+
+ /* Expect end of line. */
+ if (line < line_end && *line != '\n')
+ {
+ fprintf (stderr, "%s:%u: junk after declaration\n",
+ pretty_input_file_name (), lineno);
+ exit (1);
+ }
+
+ *argp = arg;
+ return true;
+}
+
/* Reads the entire input file. */
void
Input::read_input ()
@@ -208,18 +389,18 @@ Input::read_input ()
char *struct_decl = NULL;
unsigned int *struct_decl_linenos = NULL;
unsigned int struct_decl_linecount = 0;
- for (const char *p = declarations; p < declarations_end; )
+ for (const char *line = declarations; line < declarations_end; )
{
const char *line_end;
- line_end = (const char *) memchr (p, '\n', declarations_end - p);
+ line_end = (const char *) memchr (line, '\n', declarations_end - line);
if (line_end != NULL)
line_end++;
else
line_end = declarations_end;
- if (*p == '%')
+ if (*line == '%')
{
- if (p[1] == '{')
+ if (line[1] == '{')
{
/* Handle %{. */
if (_verbatim_declarations != NULL)
@@ -231,10 +412,10 @@ Input::read_input ()
pretty_input_file_name (), lineno);
exit (1);
}
- _verbatim_declarations = p + 2;
+ _verbatim_declarations = line + 2;
_verbatim_declarations_lineno = lineno;
}
- else if (p[1] == '}')
+ else if (line[1] == '}')
{
/* Handle %}. */
if (_verbatim_declarations == NULL)
@@ -251,11 +432,11 @@ Input::read_input ()
pretty_input_file_name (), lineno);
exit (1);
}
- _verbatim_declarations_end = p;
+ _verbatim_declarations_end = line;
/* Give a warning if the rest of the line is nonempty. */
bool nonempty_line = false;
const char *q;
- for (q = p + 2; q < line_end; q++)
+ for (q = line + 2; q < line_end; q++)
{
if (*q == '\n')
{
@@ -280,9 +461,98 @@ Input::read_input ()
}
else
{
- fprintf (stderr, "%s:%u: unrecognized %% directive\n",
- pretty_input_file_name (), lineno);
- exit (1);
+ char *arg;
+
+ if (is_declaration_with_arg (line, line_end, lineno,
+ "delimiters", &arg))
+ option.set_delimiters (arg);
+ else
+
+ if (is_declaration (line, line_end, lineno, "struct-type"))
+ option.set (TYPE);
+ else
+
+ if (is_declaration_with_arg (line, line_end, lineno,
+ "language", &arg))
+ option.set_language (arg);
+ else
+
+ if (is_define_declaration (line, line_end, lineno,
+ "slot-name", &arg))
+ option.set_slot_name (arg);
+ else
+
+ if (is_define_declaration (line, line_end, lineno,
+ "hash-function-name", &arg))
+ option.set_hash_name (arg);
+ else
+
+ if (is_define_declaration (line, line_end, lineno,
+ "lookup-function-name", &arg))
+ option.set_function_name (arg);
+ else
+
+ if (is_define_declaration (line, line_end, lineno,
+ "class-name", &arg))
+ option.set_class_name (arg);
+ else
+
+ if (is_declaration (line, line_end, lineno, "7bit"))
+ option.set (SEVENBIT);
+ else
+
+ if (is_declaration (line, line_end, lineno, "compare-lengths"))
+ option.set (LENTABLE);
+ else
+
+ if (is_declaration (line, line_end, lineno, "compare-strncmp"))
+ option.set (COMP);
+ else
+
+ if (is_declaration (line, line_end, lineno, "readonly-tables"))
+ option.set (CONST);
+ else
+
+ if (is_declaration (line, line_end, lineno, "enum"))
+ option.set (ENUM);
+ else
+
+ if (is_declaration (line, line_end, lineno, "includes"))
+ option.set (INCLUDE);
+ else
+
+ if (is_declaration (line, line_end, lineno, "global-table"))
+ option.set (GLOBAL);
+ else
+
+ if (is_define_declaration (line, line_end, lineno,
+ "word-array-name", &arg))
+ option.set_wordlist_name (arg);
+ else
+
+ if (is_declaration_with_arg (line, line_end, lineno,
+ "switch", &arg))
+ {
+ option.set_total_switches (atoi (arg));
+ if (option.get_total_switches () <= 0)
+ {
+ fprintf (stderr, "%s:%u: number of switches %s"
+ " must be a positive number\n",
+ pretty_input_file_name (), lineno, arg);
+ exit (1);
+ }
+ }
+ else
+
+ if (is_declaration (line, line_end, lineno, "omit-struct-type"))
+ option.set (NOTYPE);
+ else
+
+ {
+ fprintf (stderr, "%s:%u: unrecognized %% directive\n",
+ pretty_input_file_name (), lineno);
+ exit (1);
+ }
}
}
else if (!(_verbatim_declarations != NULL
@@ -290,12 +560,12 @@ Input::read_input ()
{
/* Append the line to struct_decl. */
size_t old_len = (struct_decl ? strlen (struct_decl) : 0);
- size_t line_len = line_end - p;
+ size_t line_len = line_end - line;
size_t new_len = old_len + line_len + 1;
char *new_struct_decl = new char[new_len];
if (old_len > 0)
memcpy (new_struct_decl, struct_decl, old_len);
- memcpy (new_struct_decl + old_len, p, line_len);
+ memcpy (new_struct_decl + old_len, line, line_len);
new_struct_decl[old_len + line_len] = '\0';
if (struct_decl)
delete[] struct_decl;
@@ -314,7 +584,7 @@ Input::read_input ()
struct_decl_linecount++;
}
lineno++;
- p = line_end;
+ line = line_end;
}
if (_verbatim_declarations != NULL && _verbatim_declarations_end == NULL)
{
diff --git a/src/options.cc b/src/options.cc
index 887bc8b..7bbe27e 100644
--- a/src/options.cc
+++ b/src/options.cc
@@ -41,10 +41,10 @@ const char *program_name;
static const int DEFAULT_JUMP_VALUE = 5;
/* Default name for generated lookup function. */
-static const char *const DEFAULT_NAME = "in_word_set";
+static const char *const DEFAULT_FUNCTION_NAME = "in_word_set";
/* Default name for the key component. */
-static const char *const DEFAULT_KEY = "name";
+static const char *const DEFAULT_SLOT_NAME = "name";
/* Default struct initializer suffix. */
static const char *const DEFAULT_INITIALIZER_SUFFIX = "";
@@ -428,14 +428,15 @@ Options::Options ()
: _option_word (C),
_input_file_name (NULL),
_output_file_name (NULL),
+ _language (NULL),
_iterations (0),
_jump (DEFAULT_JUMP_VALUE),
_initial_asso_value (0),
_asso_iterations (0),
_total_switches (1),
_size_multiple (1),
- _function_name (DEFAULT_NAME),
- _key_name (DEFAULT_KEY),
+ _function_name (DEFAULT_FUNCTION_NAME),
+ _slot_name (DEFAULT_SLOT_NAME),
_initializer_suffix (DEFAULT_INITIALIZER_SUFFIX),
_class_name (DEFAULT_CLASS_NAME),
_hash_name (DEFAULT_HASH_NAME),
@@ -476,7 +477,7 @@ Options::~Options ()
"\nlookup function name = %s"
"\nhash function name = %s"
"\nword list name = %s"
- "\nkey name = %s"
+ "\nslot name = %s"
"\ninitializer suffix = %s"
"\nasso_values iterations = %d"
"\njump value = %d"
@@ -505,7 +506,7 @@ Options::~Options ()
_option_word & INCLUDE ? "enabled" : "disabled",
_option_word & SEVENBIT ? "enabled" : "disabled",
_iterations,
- _function_name, _hash_name, _wordlist_name, _key_name,
+ _function_name, _hash_name, _wordlist_name, _slot_name,
_initializer_suffix, _asso_iterations, _jump, _size_multiple,
_initial_asso_value, _delimiters, _total_switches);
if (_option_word & ALLCHARS)
@@ -528,6 +529,91 @@ Options::~Options ()
}
+/* Sets the output language, if not already set. */
+void
+Options::set_language (const char *language)
+{
+ if (_language == NULL)
+ {
+ _language = language;
+ _option_word &= ~(KRC | C | ANSIC | CPLUSPLUS);
+ if (!strcmp (language, "KR-C"))
+ _option_word |= KRC;
+ else if (!strcmp (language, "C"))
+ _option_word |= C;
+ else if (!strcmp (language, "ANSI-C"))
+ _option_word |= ANSIC;
+ else if (!strcmp (language, "C++"))
+ _option_word |= CPLUSPLUS;
+ else
+ {
+ fprintf (stderr, "unsupported language option %s, defaulting to C\n",
+ language);
+ _option_word |= C;
+ }
+ }
+}
+
+/* Sets the total number of switch statements, if not already set. */
+void
+Options::set_total_switches (int total_switches)
+{
+ if (!(_option_word & SWITCH))
+ {
+ _option_word |= SWITCH;
+ _total_switches = total_switches;
+ }
+}
+
+/* Sets the generated function name, if not already set. */
+void
+Options::set_function_name (const char *name)
+{
+ if (_function_name == DEFAULT_FUNCTION_NAME)
+ _function_name = name;
+}
+
+/* Set the keyword key name, if not already set. */
+void
+Options::set_slot_name (const char *name)
+{
+ if (_slot_name == DEFAULT_SLOT_NAME)
+ _slot_name = name;
+}
+
+/* Sets the generated class name, if not already set. */
+void
+Options::set_class_name (const char *name)
+{
+ if (_class_name == DEFAULT_CLASS_NAME)
+ _class_name = name;
+}
+
+/* Sets the hash function name, if not already set. */
+void
+Options::set_hash_name (const char *name)
+{
+ if (_hash_name == DEFAULT_HASH_NAME)
+ _hash_name = name;
+}
+
+/* Sets the hash table array name, if not already set. */
+void
+Options::set_wordlist_name (const char *name)
+{
+ if (_wordlist_name == DEFAULT_WORDLIST_NAME)
+ _wordlist_name = name;
+}
+
+/* Sets the delimiters string, if not already set. */
+void
+Options::set_delimiters (const char *delimiters)
+{
+ if (_delimiters == DEFAULT_DELIMITERS)
+ _delimiters = delimiters;
+}
+
+
/* Parses the command line Options and sets appropriate flags in option_word. */
static const struct option long_options[] =
@@ -737,7 +823,7 @@ Options::parse_options (int argc, char *argv[])
}
case 'K': /* Make this the keyname for the keyword component field. */
{
- _key_name = /*getopt*/optarg;
+ _slot_name = /*getopt*/optarg;
break;
}
case 'l': /* Create length table to avoid extra string compares. */
@@ -747,20 +833,8 @@ Options::parse_options (int argc, char *argv[])
}
case 'L': /* Deal with different generated languages. */
{
- _option_word &= ~(KRC | C | ANSIC | CPLUSPLUS);
- if (!strcmp (/*getopt*/optarg, "KR-C"))
- _option_word |= KRC;
- else if (!strcmp (/*getopt*/optarg, "C"))
- _option_word |= C;
- else if (!strcmp (/*getopt*/optarg, "ANSI-C"))
- _option_word |= ANSIC;
- else if (!strcmp (/*getopt*/optarg, "C++"))
- _option_word |= CPLUSPLUS;
- else
- {
- fprintf (stderr, "unsupported language option %s, defaulting to C\n", /*getopt*/optarg);
- _option_word |= C;
- }
+ _language = NULL;
+ set_language (/*getopt*/optarg);
break;
}
case 'm': /* Multiple iterations for finding good asso_values. */
@@ -805,7 +879,8 @@ Options::parse_options (int argc, char *argv[])
case 'S': /* Generate switch statement output, rather than lookup table. */
{
_option_word |= SWITCH;
- if ((_total_switches = atoi (/*getopt*/optarg)) <= 0)
+ _total_switches = atoi (/*getopt*/optarg);
+ if (_total_switches <= 0)
{
fprintf (stderr, "number of switches %s must be a positive number\n", /*getopt*/optarg);
short_usage (stderr);
diff --git a/src/options.h b/src/options.h
index a8d9468..edb1fca 100644
--- a/src/options.h
+++ b/src/options.h
@@ -180,6 +180,8 @@ public:
/* Tests a given boolean option. Returns true if set, false otherwise. */
bool operator[] (Option_Type option) const;
+ /* Sets a given boolean option. */
+ void set (Option_Type option);
/* Returns the input file name. */
const char * get_input_file_name () const;
@@ -187,6 +189,9 @@ public:
/* Returns the output file name. */
const char * get_output_file_name () const;
+ /* Sets the output language, if not already set. */
+ void set_language (const char *language);
+
/* Returns the iterations value. */
int get_iterations () const;
@@ -201,30 +206,44 @@ public:
/* Returns the total number of switch statements to generate. */
int get_total_switches () const;
+ /* Sets the total number of switch statements, if not already set. */
+ void set_total_switches (int total_switches);
/* Returns the factor by which to multiply the generated table's size. */
int get_size_multiple () const;
/* Returns the generated function name. */
const char * get_function_name () const;
+ /* Sets the generated function name, if not already set. */
+ void set_function_name (const char *name);
/* Returns the keyword key name. */
- const char * get_key_name () const;
+ const char * get_slot_name () const;
+ /* Set the keyword key name, if not already set. */
+ void set_slot_name (const char *name);
/* Returns the struct initializer suffix. */
const char * get_initializer_suffix () const;
/* Returns the generated class name. */
const char * get_class_name () const;
+ /* Sets the generated class name, if not already set. */
+ void set_class_name (const char *name);
/* Returns the hash function name. */
const char * get_hash_name () const;
+ /* Sets the hash function name, if not already set. */
+ void set_hash_name (const char *name);
/* Returns the hash table array name. */
const char * get_wordlist_name () const;
+ /* Sets the hash table array name, if not already set. */
+ void set_wordlist_name (const char *name);
/* Returns the string used to delimit keywords from other attributes. */
const char * get_delimiters () const;
+ /* Sets the delimiters string, if not already set. */
+ void set_delimiters (const char *delimiters);
/* Returns key positions.
Only to be called if !options[ALLCHARS]. */
@@ -256,6 +275,9 @@ private:
/* Name of output file. */
char * _output_file_name;
+ /* The output language. */
+ const char * _language;
+
/* Amount to iterate when a collision occurs. */
int _iterations;
@@ -278,7 +300,7 @@ private:
const char * _function_name;
/* Name used for keyword key. */
- const char * _key_name;
+ const char * _slot_name;
/* Suffix for empty struct initializers. */
const char * _initializer_suffix;
diff --git a/src/options.icc b/src/options.icc
index d4069f6..5b7e9b1 100644
--- a/src/options.icc
+++ b/src/options.icc
@@ -125,6 +125,13 @@ Options::operator[] (Option_Type option) const
return _option_word & option;
}
+/* Sets a given boolean option. */
+INLINE void
+Options::set (Option_Type option)
+{
+ _option_word |= option;
+}
+
/* Returns the input file name. */
INLINE const char *
Options::get_input_file_name () const
@@ -190,9 +197,9 @@ Options::get_function_name () const
/* Returns the keyword key name. */
INLINE const char *
-Options::get_key_name () const
+Options::get_slot_name () const
{
- return _key_name;
+ return _slot_name;
}
/* Returns the struct initializer suffix. */
diff --git a/src/output.cc b/src/output.cc
index e581f95..eaf3260 100644
--- a/src/output.cc
+++ b/src/output.cc
@@ -1210,7 +1210,7 @@ Output::output_lookup_function_body (const Output_Compare& comparison) const
printf ("%*s register %schar *s = ",
indent, "", const_always);
if (option[TYPE])
- printf ("wordptr->%s", option.get_key_name ());
+ printf ("wordptr->%s", option.get_slot_name ());
else
printf ("*wordptr");
printf (";\n\n"
@@ -1241,7 +1241,7 @@ Output::output_lookup_function_body (const Output_Compare& comparison) const
printf (" {\n"
" register %schar *s = resword->%s;\n\n"
" if (",
- const_always, option.get_key_name ());
+ const_always, option.get_slot_name ());
comparison.output_comparison (Output_Expr1 ("str"), Output_Expr1 ("s"));
printf (")\n"
" return resword;\n"
@@ -1279,7 +1279,7 @@ Output::output_lookup_function_body (const Output_Compare& comparison) const
indent, "",
indent, "", const_always, option.get_wordlist_name ());
if (option[TYPE])
- printf (".%s", option.get_key_name ());
+ printf (".%s", option.get_slot_name ());
printf (";\n\n"
"%*s if (",
indent, "");
@@ -1330,7 +1330,7 @@ Output::output_lookup_function_body (const Output_Compare& comparison) const
printf ("%*s register %schar *s = ",
indent, "", const_always);
if (option[TYPE])
- printf ("wordptr->%s", option.get_key_name ());
+ printf ("wordptr->%s", option.get_slot_name ());
else
printf ("*wordptr");
printf (";\n\n"
@@ -1374,7 +1374,7 @@ Output::output_lookup_function_body (const Output_Compare& comparison) const
indent, "", const_always, option.get_wordlist_name ());
if (option[TYPE])
- printf (".%s", option.get_key_name ());
+ printf (".%s", option.get_slot_name ());
printf (";\n\n"
"%*s if (",