diff options
author | Karl Williamson <khw@cpan.org> | 2020-11-12 14:40:19 -0700 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2020-11-18 09:50:32 -0700 |
commit | 3dcca105f68f9a3c4474da8390e439dd6bc34a74 (patch) | |
tree | c2289b456a393cfa3b0a3aef0ea47f6a9ca53fe0 | |
parent | bcf3564c67eb142f6b534cb04acdf34604567910 (diff) | |
download | perl-3dcca105f68f9a3c4474da8390e439dd6bc34a74.tar.gz |
re/fold_grind.pl: Test a couple more code points
These add tests for checking that the revised folding in a future commit
works in some edge cases that previously weren't an issue.
-rw-r--r-- | t/re/fold_grind.pl | 16 |
1 files changed, 15 insertions, 1 deletions
diff --git a/t/re/fold_grind.pl b/t/re/fold_grind.pl index fb0d3620e8..a5ae6fd0fd 100644 --- a/t/re/fold_grind.pl +++ b/t/re/fold_grind.pl @@ -45,9 +45,23 @@ if ($charset eq 'T') { # Special-cased characters in the .c's that we want to make sure get tested. my %be_sure_to_test = ( chr utf8::unicode_to_native(0xDF) => 1, # LATIN_SMALL_LETTER_SHARP_S - "\x{1E9E}" => 1, # LATIN_CAPITAL_LETTER_SHARP_S + + # This is included because the uppercase occupies more bytes, but the + # first two bytes of their representations differ only in one bit, + # that could lead the code looking for shortcuts astray; you can't do + # certain shortcuts if the lengths differ + "\x{29E}" => 1, # LATIN SMALL LETTER TURNED K + "\x{390}" => 1, # GREEK_SMALL_LETTER_IOTA_WITH_DIALYTIKA_AND_TONOS "\x{3B0}" => 1, # GREEK_SMALL_LETTER_UPSILON_WITH_DIALYTIKA_AND_TONOS + + # This is included because the uppercase and lowercase differ by only + # a single bit and it is in the first of the two byte representations. + # This showed that a previous way was erroneous of calculating if + # initial substrings were closely-related bit-wise. + "\x{3CC}" => 1, # GREEK SMALL LETTER OMICRON WITH TONOS + + "\x{1E9E}" => 1, # LATIN_CAPITAL_LETTER_SHARP_S "\x{1FD3}" => 1, # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA "\x{1FE3}" => 1, # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA "I" => 1, |