diff options
author | H. Peter Anvin <hpa@zytor.com> | 2017-04-03 00:09:58 -0700 |
---|---|---|
committer | H. Peter Anvin <hpa@zytor.com> | 2017-04-03 00:27:07 -0700 |
commit | 5253f58c3679b9b21006567ad26a419904791645 (patch) | |
tree | 538cd02d8ea06b042e97ecf548366883fb9a31ba /nasmlib | |
parent | b1a5b26477db4ce4270b8de75c36b8f9acfc3f69 (diff) | |
download | nasm-5253f58c3679b9b21006567ad26a419904791645.tar.gz |
Add generic perfect string hashes, use for directives
Add a generic facility for generating perfect string hashes, where all
that is needed is an enum and a string table. The existing mechanism
using a custom Perl script wrapped around a module continues to be
available for any use case where this particular approach isn't
sophisticated enough.
Much of this patch comes from renaming "enum directives" to "enum
directive" as a result of the string hash generator expecting a set of
uniform naming conventions.
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Diffstat (limited to 'nasmlib')
-rw-r--r-- | nasmlib/badenum.c | 43 | ||||
-rw-r--r-- | nasmlib/perfhash.c | 55 | ||||
-rwxr-xr-x | nasmlib/perfhash.pl | 362 |
3 files changed, 460 insertions, 0 deletions
diff --git a/nasmlib/badenum.c b/nasmlib/badenum.c new file mode 100644 index 00000000..6f880c85 --- /dev/null +++ b/nasmlib/badenum.c @@ -0,0 +1,43 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 2017 The NASM Authors - All Rights Reserved + * See the file AUTHORS included with the NASM distribution for + * the specific copyright holders. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following + * conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ----------------------------------------------------------------------- */ + +#include "nasmlib.h" + +/* Used to avoid returning NULL to a debug printing function */ +const char *invalid_enum_str(int x) +{ + static char buf[64]; + + snprintf(buf, sizeof buf, "<invalid %d>", x); + return buf; +} diff --git a/nasmlib/perfhash.c b/nasmlib/perfhash.c new file mode 100644 index 00000000..5cd6714e --- /dev/null +++ b/nasmlib/perfhash.c @@ -0,0 +1,55 @@ +/* ----------------------------------------------------------------------- * + * + * Copyright 2017 The NASM Authors - All Rights Reserved + * See the file AUTHORS included with the NASM distribution for + * the specific copyright holders. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following + * conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ----------------------------------------------------------------------- */ + +#include "perfhash.h" +#include "hashtbl.h" /* For crc64i() */ + +int perfhash_find(const struct perfect_hash *hash, const char *str) +{ + uint32_t k1, k2; + uint64_t crc; + uint16_t ix; + + crc = crc64i(hash->crcinit, str); + k1 = (uint32_t)crc & hash->hashmask; + k2 = (uint32_t)(crc >> 32) & hash->hashmask; + + ix = hash->hashvals[k1] + hash->hashvals[k2 + hash->hashmask + 1]; + + if (ix >= hash->tbllen || + !hash->strings[ix] || + nasm_stricmp(str, hash->strings[ix])) + return hash->errval; + + return hash->tbloffs + ix; +} diff --git a/nasmlib/perfhash.pl b/nasmlib/perfhash.pl new file mode 100755 index 00000000..639b347f --- /dev/null +++ b/nasmlib/perfhash.pl @@ -0,0 +1,362 @@ +#!/usr/bin/perl +## -------------------------------------------------------------------------- +## +## Copyright 1996-2017 The NASM Authors - All Rights Reserved +## See the file AUTHORS included with the NASM distribution for +## the specific copyright holders. +## +## Redistribution and use in source and binary forms, with or without +## modification, are permitted provided that the following +## conditions are met: +## +## * Redistributions of source code must retain the above copyright +## notice, this list of conditions and the following disclaimer. +## * Redistributions in binary form must reproduce the above +## copyright notice, this list of conditions and the following +## disclaimer in the documentation and/or other materials provided +## with the distribution. +## +## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND +## CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, +## INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +## MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +## DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +## NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +## LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +## CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +## OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +## EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +## +## -------------------------------------------------------------------------- + +# +# Generate a perfect hash for general case-insensitive string-to-enum +# lookup. This generates an enum and the corresponding hash, but +# relies on a common function to parse the hash. +# +# Usage: +# perfhash.pl h foohash.dat foohash.h (to generate C header) +# perfhash.pl c foohash.dat foohash.c (to generate C source) +# + +use strict; + +require 'phash.ph'; + +sub basename($) { + my($s) = @_; + $s =~ s/^.*[^-[:alnum:]_\.]//; # Remove path component as best we can + return $s; +} + +sub intval($) { + my($s) = @_; + + if ($s =~ /^0/) { + return oct($s); # Handles octal or hexadecimal + } elsif ($s =~ /^\-(0.*)$/) { + return -oct($1); + } else { + return $s + 0; # Forcibly convert to number + } +} + +my($output, $infile, $outfile) = @ARGV; +my $me = basename($0); + +# The following special things are allowed in the input file: +# #<space> or ; begins a comment +# #include filename +# #name str +# The name of the hash +# #prefix str +# Defines the prefix before enum +# #guard str +# Defines the header guard string +# #special str [= value] +# Generate an enum value without a corresponding string; not capitalized. +# #header str +# Indicates the name of the .h file to include from the .c file +# #errval str +# Define the value to be returned if a string is not found +# (defaults to -1). This can be any constant C expression, +# including one of the enum values. +# +# Regular lines are just str [= value] +# +# Enumeration is generated in the order listed in the file, just as in C +# specifying a value causes the values to increase by 1 from that point on +# unless specified. + +my $name; +my $prefix; +my $guard; +my $hfile; + +my %strings = (); +my %specials = (); +my $next_value = 0; +my $errval = '-1'; + +my @incstack = (); +my @filenames = ($infile); +my @linenums = (0); +my $dd = undef; +my $err = 0; + +while (scalar(@filenames)) { + if (!defined($dd)) { + open($dd, '<', $filenames[-1]) + or die "$0: cannot open: $filenames[-1]: $!\n"; + } + + my $line = <$dd>; + if (!defined($line)) { + close($dd); + $dd = pop @incstack; + pop @filenames; + pop @linenums; + next; + } + + $linenums[-1]++; + + chomp $line; + $line =~ s/\s*(|\;.*|\#\s.*|\#)$//; # Remove comments and trailing space + $line =~ s/^\s+//; # Remove leading space + if ($line eq '') { + # Do nothing + } elsif ($line =~ /^\#name\s+([[:alnum:]_]+)$/) { + $name = $1; + } elsif ($line =~ /^\#prefix\s+([[:alnum:]_]+)$/) { + $prefix = $1; + } elsif ($line =~ /^\#guard\s+([[:alnum:]_]+)$/) { + $guard = $1; + } elsif ($line =~ /^\#errval\s+(\S.*)$/) { + $errval = $1; + } elsif ($line =~ /^\#header\s+(\"(.+)\"|\S+)$/) { + $hfile = ($2 ne '') ? $2 : $1; + } elsif ($line =~ /^\#include\s+(\"(.+)\"|\S+)$/) { + push @incstack, $dd; + push @filenames, (($2 ne '') ? $2 : $1); + push @linenums, 0; + undef $dd; # Open a new file + } elsif ($line =~ /^(|\#special\s+)(\S+)\s*(|=\s*(\-?(0[Xx][[:xdigit:]]+|0[0-7]*|[0-9]+)))$/) { + $next_value = intval($4) if ($4 ne ''); + if ($1 eq '') { + $strings{$2} = $next_value++; + } else { + $specials{$2} = $next_value++; + } + } else { + printf STDERR "%s:%d:%s syntax error: \"%s\"\n", + $filenames[-1], $linenums[-1], + (scalar(@incstack) == 1) ? '' : "(from $infile)", $line; + $err++; + } +} + +exit 1 if ($err); + +# Default name, prefix, and header guard name +if (!defined($name)) { + $name = basename($infile); + $name =~ s/(\..*)$//; # Strip extension, if any +} +if (!defined($prefix)) { + $prefix = "\U${name}\E_"; +} +if (!defined($hfile)) { + $hfile = $outfile; + $hfile =~ s/\.c$/\.h/; +} +if (!defined($guard)) { + $guard = basename($hfile); + $guard =~ s/[^[:alnum:]_]/_/g; + $guard =~ s/__+/_/g; + $guard = "\U$guard"; +} + +# Verify input. We can't have more than one constant with the same +# enumeration value, nor the same enumeration string. +if (scalar(keys(%strings)) == 0) { + die "$0: $infile: no strings to hash!\n"; +} + +my %enums; +my %enumvals; +my %stringbyval; +my $max_enum; +my $tbllen = 0; +my $tbloffs; +foreach my $s (keys(%strings)) { + my $es = "${prefix}\U${s}"; + $es =~ s/[^[:alnum:]_]/_/g; + $es =~ s/__+/_/g; + my $v = $strings{$s}; + $stringbyval{$v} = $s; + if (defined($enums{$es})) { + printf STDERR "%s: string \"%s\" duplicates existing enum %s\n", + $infile, $s, $es; + $err++; + } else { + $enums{$es} = $v; + } + if (defined($enumvals{$v})) { + printf STDERR "%s: string \"%s\" duplicates existing enum constant %d\n", $v; + $err++; + } else { + $enumvals{$v} = $es; + } + $max_enum = $v if ($v > $max_enum || !defined($max_enum)); + $tbloffs = $v if ($v < $tbloffs || !defined($tbloffs)); + $tbllen = $v+1 if ($v >= $tbllen || !defined($tbllen)); +} +foreach my $s (keys(%specials)) { + my $es = $prefix . $s; # No string mangling here + my $v = $specials{$s}; + if (defined($enums{$es})) { + printf STDERR "%s: special \"%s\" duplicates existing enum %s\n", + $infile, $s, $es; + $err++; + } else { + $enums{$es} = $v; + } + if (defined ($enumvals{$v})) { + printf STDERR "%s: special \"%s\" duplicates existing enum constant %d\n", $v; + $err++; + } else { + $enumvals{$v} = $es; + } + $max_enum = $v if ($v > $max_enum || !defined($max_enum)); +} + +$tbllen -= $tbloffs; +if ($tbllen > 65536) { + printf STDERR "%s: span of enumeration values too large\n"; + $err++; +} + +exit 1 if ($err); + +open(F, '>', $outfile) + or die "$0: cannot create: ${outfile}: $!\n"; + +if ($output eq 'h') { + print F "/*\n"; + print F " * This file is generated from $infile\n"; + print F " * by $me; do not edit.\n"; + print F " */\n"; + print F "\n"; + + print F "#ifndef $guard\n"; + print F "#define $guard 1\n\n"; + print F "#include \"perfhash.h\"\n\n"; + + my $c = '{'; + $next_value = 0; + print F "enum ${name} "; + foreach my $v (sort { $a <=> $b } keys(%enumvals)) { + my $s = $enumvals{$v}; + print F "$c\n $s"; + print F " = $v" if ($v != $next_value); + $next_value = $v + 1; + $c = ','; + } + print F "\n};\n\n"; + print F "extern const struct perfect_hash ${name}_hash;\n"; + printf F "extern const char * const %s_tbl[%d];\n", $name, $tbllen; + + print F "\nstatic inline enum ${name} ${name}_find(const char *str)\n"; + print F "{\n"; + print F " return perfhash_find(&${name}_hash, str);\n"; + print F "}\n"; + + print F "\nstatic inline const char * ${name}_name(enum ${name} x)\n"; + print F "{\n"; + printf F " size_t ix = (size_t)x - (%d);\n", $tbloffs; + printf F " if (ix >= %d)\n", $tbllen; + print F " return NULL;\n"; + print F " return ${name}_tbl[ix];\n"; + print F "}\n"; + + print F "\nstatic inline const char * ${name}_dname(enum ${name} x)\n"; + print F "{\n"; + print F " const char *y = ${name}_name(x);\n"; + print F " return y ? y : invalid_enum_str(x);\n"; + print F "}\n"; + + print F "\n#endif /* $guard */\n"; +} elsif ($output eq 'c') { + # The strings we hash must all be lower case, even if the string + # table doesn't contain them that way. + + my %lcstrings; + foreach my $s (keys(%strings)) { + my $ls = "\L$s"; + if (defined($lcstrings{$ls})) { + printf STDERR "%s: strings \"%s\" and \"%s\" differ only in case\n", + $infile, $s, $strings{$lcstrings{$s}}; + } else { + $lcstrings{$ls} = $strings{$s} - $tbloffs; + } + } + + my @hashinfo = gen_perfect_hash(\%lcstrings); + if (!@hashinfo) { + die "$0: no hash found\n"; + } + + # Paranoia... + verify_hash_table(\%lcstrings, \@hashinfo); + + my ($n, $sv, $g) = @hashinfo; + + die if ($n & ($n-1)); + + print F "/*\n"; + print F " * This file is generated from $infile\n"; + print F " * by $me; do not edit.\n"; + print F " */\n"; + print F "\n"; + + print F "#include \"$hfile\"\n\n"; + + printf F "const char * const %s_tbl[%d] = ", $name, $tbllen; + my $c = '{'; + for (my $i = $tbloffs; $i < $tbloffs+$tbllen; $i++) { + printf F "%s\n %s", $c, + defined($stringbyval{$i}) ? '"'.$stringbyval{$i}.'"' : 'NULL'; + $c = ','; + } + print F "\n};\n\n"; + + print F "#define UNUSED (65536/3)\n\n"; + + printf F "static const int16_t %s_hashvals[%d] = ", $name, $n*2; + $c = '{'; + for (my $i = 0; $i < $n; $i++) { + my $h = ${$g}[$i*2+0]; + print F "$c\n ", defined($h) ? $h : 'UNUSED'; + $c = ','; + } + for (my $i = 0; $i < $n; $i++) { + my $h = ${$g}[$i*2+1]; + print F "$c\n ", defined($h) ? $h : 'UNUSED'; + $c = ','; + } + print F "\n};\n\n"; + + print F "const struct perfect_hash ${name}_hash = {\n"; + printf F " UINT64_C(0x%08x%08x),\n", $$sv[0], $$sv[1]; # crcinit + printf F " UINT32_C(0x%x),\n", $n-1; # hashmask + printf F " UINT32_C(%u),\n", $tbllen; # tbllen + printf F " %d,\n", $tbloffs; # tbloffs + printf F " (%s),\n", $errval; # errval + printf F " ${name}_hashvals,\n"; # hashvals + printf F " ${name}_tbl\n"; # strings + print F "};\n"; +} |