diff options
author | Karl Williamson <khw@cpan.org> | 2020-06-02 10:11:47 -0600 |
---|---|---|
committer | Karl Williamson <khw@cpan.org> | 2022-06-09 09:08:47 -0600 |
commit | 26df2752a1a36626b649abf5e637c9566e53a143 (patch) | |
tree | e76be8b457c8e409210507a98578f01fcbe9a7b1 /regexec.c | |
parent | 6a4fc003e5b726738d0652fd745146ae267c9f4f (diff) | |
download | perl-26df2752a1a36626b649abf5e637c9566e53a143.tar.gz |
regexec.c: Handle Turkish locale if large ANYOF bitmap
Perl defaults to the bitmap for ANYOF nodes being for the lowest 256
characters, but it is possible to compile the bitmap to be up to size
2**16. Doing so, prior to this commit, broke Turkish locale handling.
Diffstat (limited to 'regexec.c')
-rw-r--r-- | regexec.c | 30 |
1 files changed, 24 insertions, 6 deletions
@@ -10776,19 +10776,37 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const * matches */ if ( UNLIKELY(PL_in_utf8_turkic_locale) && ! match - && (flags & ANYOFL_FOLD) - && utf8_target) + && (flags & ANYOFL_FOLD)) { - if (c == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) { - if (ANYOF_BITMAP_TEST(n, 'i')) { + if (utf8_target) { + if (c == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) { + if (ANYOF_BITMAP_TEST(n, 'i')) { + match = TRUE; + } + } + else if (c == LATIN_SMALL_LETTER_DOTLESS_I) { + if (ANYOF_BITMAP_TEST(n, 'I')) { + match = TRUE; + } + } + } + +#if NUM_ANYOF_CODE_POINTS > 256 + /* Larger bitmap means these special cases aren't handled outside + * the bitmap above. */ + if (*p == 'i') { + if (ANYOF_BITMAP_TEST(n, + LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE)) + { match = TRUE; } } - else if (c == LATIN_SMALL_LETTER_DOTLESS_I) { - if (ANYOF_BITMAP_TEST(n, 'I')) { + else if (*p == 'I') { + if (ANYOF_BITMAP_TEST(n, LATIN_SMALL_LETTER_DOTLESS_I)) { match = TRUE; } } +#endif } if (UNICODE_IS_SUPER(c) |