diff options
author | Yves Orton <demerphq@gmail.com> | 2022-04-19 07:54:55 +0200 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2022-04-19 05:41:19 -0600 |
commit | 4948e2deedbb0f9ed5b48e7a32e737b829a1cafe (patch) | |
tree | f8354815b0fcc6135457b686641665236f29f6c8 | |
parent | eda35008b17e739922da4577bba648b73b8fbefc (diff) | |
download | perl-4948e2deedbb0f9ed5b48e7a32e737b829a1cafe.tar.gz |
regen/mph.pl - add a validation step to build_split_words()
Exercise an abundance of caution and validate that the buffer and split
point data returned is fit for pupose.
Includes the output of running regen/mk_invlist.pl.
-rw-r--r-- | regen/mph.pl | 21 | ||||
-rw-r--r-- | uni_keywords.h | 2 |
2 files changed, 18 insertions, 5 deletions
diff --git a/regen/mph.pl b/regen/mph.pl index 88b36e4967..55c7e5f4c5 100644 --- a/regen/mph.pl +++ b/regen/mph.pl @@ -903,12 +903,25 @@ sub build_split_words { # _squeeze algorithm, although it uses less memory and will likely # be faster, especially if randomization is enabled. The default # is to use _squeeze as our hash is not that large (~8k keys). - + my ($buf, $split_words); if ($self->{simple_split}) { - return $self->build_split_words_simple(); + ($buf, $split_words)= $self->build_split_words_simple(); } - - return $self->build_split_words_squeeze(); + else { + ($buf, $split_words)= $self->build_split_words_squeeze(); + } + foreach my $key (sort keys %$split_words) { + my $point= $split_words->{$key}; + my $prefix= substr($key, 0, $point); + my $suffix= substr($key, $point); + if (index($buf, $prefix) < 0) { + die "Failed to find prefix '$prefix' for '$key'"; + } + if (length $suffix and index($buf, $suffix) < 0) { + die "Failed to find suffix '$suffix' for '$key'"; + } + } + return ($buf, $split_words); } sub blob_as_code { diff --git a/uni_keywords.h b/uni_keywords.h index 0e50990a9a..45b78e2b39 100644 --- a/uni_keywords.h +++ b/uni_keywords.h @@ -7681,5 +7681,5 @@ match_uniprop( const unsigned char * const key, const U16 key_len ) { * 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl * 5f8520d3a17ade6317fc0c423f5091470924b1ef425bca0c41ce8e4a9f8460fe regen/mk_PL_charclass.pl * 1c73795f9150bd556573e7ae982789377289e22b6a7f3db0a05c36852e8d749f regen/mk_invlists.pl - * c71eddd9ff61cf69040e9ab7f5711415fc28e5d41003af2479e4aaddbadbd0aa regen/mph.pl + * 6e843ca664e002c0d05693c02281ee61f8e7900edba54981dfba549027de37b0 regen/mph.pl * ex: set ro: */ |