summaryrefslogtreecommitdiff
path: root/regen/mk_invlists.pl
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2018-04-19 23:16:33 -0600
committerKarl Williamson <khw@cpan.org>2018-04-20 01:11:54 -0600
commitafde5508f26243045d5c09a28d2f6465c15ad1ce (patch)
tree980643a7f04f16c446f3f40ccdbd31bed828239b /regen/mk_invlists.pl
parentc2300ef8be26c9fc3c8b6b2c2010436e21614d19 (diff)
downloadperl-afde5508f26243045d5c09a28d2f6465c15ad1ce.tar.gz
Use a perfect hash for Unicode property lookups
The previous commits in this series have been preparing to allow the Devel::Tokenizer::C code to be swapped out for the much smaller perfect hash code.
Diffstat (limited to 'regen/mk_invlists.pl')
-rw-r--r--regen/mk_invlists.pl45
1 files changed, 11 insertions, 34 deletions
diff --git a/regen/mk_invlists.pl b/regen/mk_invlists.pl
index c9e05a8825..4c11d08a1a 100644
--- a/regen/mk_invlists.pl
+++ b/regen/mk_invlists.pl
@@ -2758,49 +2758,26 @@ my @sources = qw(regen/mk_invlists.pl
read_only_bottom_close_and_rename($out_fh, \@sources);
-use Devel::Tokenizer::C;
+require './regen/mph.pl';
sub token_name
{
my $name = sanitize_name(shift);
warn "$name contains non-word" if $name =~ /\W/a;
- return "return $table_name_prefix\U$name;\n"
+ return "$table_name_prefix\U$name"
}
-my $t = Devel::Tokenizer::C->new(TokenFunc => \&token_name,
- StringLength => 'len',
- Strategy => 'narrow',
- TokenEnd => undef,
- UnknownCode => 'return 0;',
- );
-
-$t->add_tokens(lc $_) for @keywords;
-
-my $keywords_fh = open_new('uni_keywords.c', '>',
+my $keywords_fh = open_new('uni_keywords.h', '>',
{style => '*', by => 'regen/mk_invlists.pl',
- from => "Unicode::UCD"});
-
-print $keywords_fh <<EOF;
-
-#define PERL_IN_UNI_KEYWORDS_C
-
-#include "EXTERN.h"
-#include "perl.h"
-
-int
-Perl_uniprop_lookup(const char * tokstr, const Size_t len)
-{
-
- PERL_ARGS_ASSERT_UNIPROP_LOOKUP;
-
-EOF
-
-print $keywords_fh $t->generate;
-
-print $keywords_fh <<EOF;
-
+ from => "mph.pl"});
+my %keyword_hash;
+foreach my $keyword (@keywords) {
+ $keyword_hash{$keyword} = token_name($keyword);
}
-EOF
+my ($second_level, $seed1, $length_all_keys, $smart_blob, $rows) = MinimalPerfectHash::make_mph_from_hash(\%keyword_hash);
+print $keywords_fh MinimalPerfectHash::make_algo($second_level, $seed1, $length_all_keys, $smart_blob, $rows, undef, undef, undef, 'match_uniprop' );
+
+push @sources, 'regen/mph.pl';
read_only_bottom_close_and_rename($keywords_fh, \@sources);