diff options
author | Karl Williamson <khw@cpan.org> | 2018-04-19 23:16:33 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2018-04-20 01:11:54 -0600 |
commit | afde5508f26243045d5c09a28d2f6465c15ad1ce (patch) | |
tree | 980643a7f04f16c446f3f40ccdbd31bed828239b /regen/mk_invlists.pl | |
parent | c2300ef8be26c9fc3c8b6b2c2010436e21614d19 (diff) | |
download | perl-afde5508f26243045d5c09a28d2f6465c15ad1ce.tar.gz |
Use a perfect hash for Unicode property lookups
The previous commits in this series have been preparing to allow the
Devel::Tokenizer::C code to be swapped out for the much smaller perfect
hash code.
Diffstat (limited to 'regen/mk_invlists.pl')
-rw-r--r-- | regen/mk_invlists.pl | 45 |
1 files changed, 11 insertions, 34 deletions
diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl index c9e05a8825..4c11d08a1a 100644 --- a/regen/mk_invlists.pl +++ b/regen/mk_invlists.pl @@ -2758,49 +2758,26 @@ my @sources = qw(regen/mk_invlists.pl read_only_bottom_close_and_rename($out_fh, \@sources); -use Devel::Tokenizer::C; +require './regen/mph.pl'; sub token_name { my $name = sanitize_name(shift); warn "$name contains non-word" if $name =~ /\W/a; - return "return $table_name_prefix\U$name;\n" + return "$table_name_prefix\U$name" } -my $t = Devel::Tokenizer::C->new(TokenFunc => \&token_name, - StringLength => 'len', - Strategy => 'narrow', - TokenEnd => undef, - UnknownCode => 'return 0;', - ); - -$t->add_tokens(lc $_) for @keywords; - -my $keywords_fh = open_new('uni_keywords.c', '>', +my $keywords_fh = open_new('uni_keywords.h', '>', {style => '*', by => 'regen/mk_invlists.pl', - from => "Unicode::UCD"}); - -print $keywords_fh <<EOF; - -#define PERL_IN_UNI_KEYWORDS_C - -#include "EXTERN.h" -#include "perl.h" - -int -Perl_uniprop_lookup(const char * tokstr, const Size_t len) -{ - - PERL_ARGS_ASSERT_UNIPROP_LOOKUP; - -EOF - -print $keywords_fh $t->generate; - -print $keywords_fh <<EOF; - + from => "mph.pl"}); +my %keyword_hash; +foreach my $keyword (@keywords) { + $keyword_hash{$keyword} = token_name($keyword); } -EOF +my ($second_level, $seed1, $length_all_keys, $smart_blob, $rows) = MinimalPerfectHash::make_mph_from_hash(\%keyword_hash); +print $keywords_fh MinimalPerfectHash::make_algo($second_level, $seed1, $length_all_keys, $smart_blob, $rows, undef, undef, undef, 'match_uniprop' ); + +push @sources, 'regen/mph.pl'; read_only_bottom_close_and_rename($keywords_fh, \@sources); |