summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYves Orton <demerphq@gmail.com>2022-04-19 07:54:55 +0200
committerKarl Williamson <khw@cpan.org>2022-04-19 05:41:19 -0600
commit4948e2deedbb0f9ed5b48e7a32e737b829a1cafe (patch)
treef8354815b0fcc6135457b686641665236f29f6c8
parenteda35008b17e739922da4577bba648b73b8fbefc (diff)
downloadperl-4948e2deedbb0f9ed5b48e7a32e737b829a1cafe.tar.gz
regen/mph.pl - add a validation step to build_split_words()
Exercise an abundance of caution and validate that the buffer and split point data returned is fit for pupose. Includes the output of running regen/mk_invlist.pl.
-rw-r--r--regen/mph.pl21
-rw-r--r--uni_keywords.h2
2 files changed, 18 insertions, 5 deletions
diff --git a/regen/mph.pl b/regen/mph.pl
index 88b36e4967..55c7e5f4c5 100644
--- a/regen/mph.pl
+++ b/regen/mph.pl
@@ -903,12 +903,25 @@ sub build_split_words {
# _squeeze algorithm, although it uses less memory and will likely
# be faster, especially if randomization is enabled. The default
# is to use _squeeze as our hash is not that large (~8k keys).
-
+ my ($buf, $split_words);
if ($self->{simple_split}) {
- return $self->build_split_words_simple();
+ ($buf, $split_words)= $self->build_split_words_simple();
}
-
- return $self->build_split_words_squeeze();
+ else {
+ ($buf, $split_words)= $self->build_split_words_squeeze();
+ }
+ foreach my $key (sort keys %$split_words) {
+ my $point= $split_words->{$key};
+ my $prefix= substr($key, 0, $point);
+ my $suffix= substr($key, $point);
+ if (index($buf, $prefix) < 0) {
+ die "Failed to find prefix '$prefix' for '$key'";
+ }
+ if (length $suffix and index($buf, $suffix) < 0) {
+ die "Failed to find suffix '$suffix' for '$key'";
+ }
+ }
+ return ($buf, $split_words);
}
sub blob_as_code {
diff --git a/uni_keywords.h b/uni_keywords.h
index 0e50990a9a..45b78e2b39 100644
--- a/uni_keywords.h
+++ b/uni_keywords.h
@@ -7681,5 +7681,5 @@ match_uniprop( const unsigned char * const key, const U16 key_len ) {
* 0a6b5ab33bb1026531f816efe81aea1a8ffcd34a27cbea37dd6a70a63d73c844 regen/charset_translations.pl
* 5f8520d3a17ade6317fc0c423f5091470924b1ef425bca0c41ce8e4a9f8460fe regen/mk_PL_charclass.pl
* 1c73795f9150bd556573e7ae982789377289e22b6a7f3db0a05c36852e8d749f regen/mk_invlists.pl
- * c71eddd9ff61cf69040e9ab7f5711415fc28e5d41003af2479e4aaddbadbd0aa regen/mph.pl
+ * 6e843ca664e002c0d05693c02281ee61f8e7900edba54981dfba549027de37b0 regen/mph.pl
* ex: set ro: */