summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2020-06-02 10:11:47 -0600
committerKarl Williamson <khw@cpan.org>2022-06-09 09:08:47 -0600
commit26df2752a1a36626b649abf5e637c9566e53a143 (patch)
treee76be8b457c8e409210507a98578f01fcbe9a7b1 /regexec.c
parent6a4fc003e5b726738d0652fd745146ae267c9f4f (diff)
downloadperl-26df2752a1a36626b649abf5e637c9566e53a143.tar.gz
regexec.c: Handle Turkish locale if large ANYOF bitmap
Perl defaults to the bitmap for ANYOF nodes being for the lowest 256 characters, but it is possible to compile the bitmap to be up to size 2**16. Doing so, prior to this commit, broke Turkish locale handling.
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c30
1 files changed, 24 insertions, 6 deletions
diff --git a/regexec.c b/regexec.c
index f6c358d44c..8b14549692 100644
--- a/regexec.c
+++ b/regexec.c
@@ -10776,19 +10776,37 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
* matches */
if ( UNLIKELY(PL_in_utf8_turkic_locale)
&& ! match
- && (flags & ANYOFL_FOLD)
- && utf8_target)
+ && (flags & ANYOFL_FOLD))
{
- if (c == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
- if (ANYOF_BITMAP_TEST(n, 'i')) {
+ if (utf8_target) {
+ if (c == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
+ if (ANYOF_BITMAP_TEST(n, 'i')) {
+ match = TRUE;
+ }
+ }
+ else if (c == LATIN_SMALL_LETTER_DOTLESS_I) {
+ if (ANYOF_BITMAP_TEST(n, 'I')) {
+ match = TRUE;
+ }
+ }
+ }
+
+#if NUM_ANYOF_CODE_POINTS > 256
+ /* Larger bitmap means these special cases aren't handled outside
+ * the bitmap above. */
+ if (*p == 'i') {
+ if (ANYOF_BITMAP_TEST(n,
+ LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE))
+ {
match = TRUE;
}
}
- else if (c == LATIN_SMALL_LETTER_DOTLESS_I) {
- if (ANYOF_BITMAP_TEST(n, 'I')) {
+ else if (*p == 'I') {
+ if (ANYOF_BITMAP_TEST(n, LATIN_SMALL_LETTER_DOTLESS_I)) {
match = TRUE;
}
}
+#endif
}
if (UNICODE_IS_SUPER(c)