summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKarl Williamson <public@khwilliamson.com>2011-10-27 09:39:11 -0600
committerKarl Williamson <public@khwilliamson.com>2011-10-27 10:56:06 -0600
commit4f03b4b68c373d6b483f2a44808498ec2c2bf9f7 (patch)
tree16be33e98afdfd5b58a4cca6ccf14cb36d809134
parent9ca75586d6d424bf8518f247fc8a1ebeb6e38b51 (diff)
downloadperl-4f03b4b68c373d6b483f2a44808498ec2c2bf9f7.tar.gz
PATCH: [perl #101970] /[[:lower:]]/i matches upper case
This bug is a regression in 5.14, in which /[[:lower:]]/i and /[[:upper:]]/i no longer matched the opposite case. The fix is to have these use a different table under /i matching, that includes the correct /i code points. These tables were already available, just unused.
-rw-r--r--regcomp.c49
-rw-r--r--t/re/re_tests4
2 files changed, 35 insertions, 18 deletions
diff --git a/regcomp.c b/regcomp.c
index 0202be8e74..ba48a76a13 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -9600,7 +9600,7 @@ S_checkposixcc(pTHX_ RExC_state_t *pRExC_state)
}
}
-/* No locale test, and always Unicode semantics */
+/* No locale test, and always Unicode semantics, no ignore-case differences */
#define _C_C_T_NOLOC_(NAME,TEST,WORD) \
ANYOF_##NAME: \
for (value = 0; value < 256; value++) \
@@ -9620,8 +9620,11 @@ case ANYOF_N##NAME: \
/* Like the above, but there are differences if we are in uni-8-bit or not, so
* there are two tests passed in, to use depending on that. There aren't any
* cases where the label is different from the name, so no need for that
- * parameter */
-#define _C_C_T_(NAME, TEST_8, TEST_7, WORD) \
+ * parameter.
+ * Sets 'what' to WORD which is the property name for non-bitmap code points;
+ * But, uses FOLD_WORD instead if /i has been selected, to allow a different
+ * property name */
+#define _C_C_T_(NAME, TEST_8, TEST_7, WORD, FOLD_WORD) \
ANYOF_##NAME: \
if (LOC) ANYOF_CLASS_SET(ret, ANYOF_##NAME); \
else if (UNI_SEMANTICS) { \
@@ -9638,7 +9641,12 @@ ANYOF_##NAME: \
} \
} \
yesno = '+'; \
- what = WORD; \
+ if (FOLD) { \
+ what = FOLD_WORD; \
+ } \
+ else { \
+ what = WORD; \
+ } \
break; \
case ANYOF_N##NAME: \
if (LOC) ANYOF_CLASS_SET(ret, ANYOF_N##NAME); \
@@ -9670,7 +9678,12 @@ case ANYOF_N##NAME: \
} \
} \
yesno = '!'; \
- what = WORD; \
+ if (FOLD) { \
+ what = FOLD_WORD; \
+ } \
+ else { \
+ what = WORD; \
+ } \
break
STATIC U8
@@ -10228,20 +10241,20 @@ parseit:
* --jhi */
switch ((I32)namedclass) {
- case _C_C_T_(ALNUMC, isALNUMC_L1, isALNUMC, "XPosixAlnum");
- case _C_C_T_(ALPHA, isALPHA_L1, isALPHA, "XPosixAlpha");
- case _C_C_T_(BLANK, isBLANK_L1, isBLANK, "XPosixBlank");
- case _C_C_T_(CNTRL, isCNTRL_L1, isCNTRL, "XPosixCntrl");
- case _C_C_T_(GRAPH, isGRAPH_L1, isGRAPH, "XPosixGraph");
- case _C_C_T_(LOWER, isLOWER_L1, isLOWER, "XPosixLower");
- case _C_C_T_(PRINT, isPRINT_L1, isPRINT, "XPosixPrint");
- case _C_C_T_(PSXSPC, isPSXSPC_L1, isPSXSPC, "XPosixSpace");
- case _C_C_T_(PUNCT, isPUNCT_L1, isPUNCT, "XPosixPunct");
- case _C_C_T_(UPPER, isUPPER_L1, isUPPER, "XPosixUpper");
+ case _C_C_T_(ALNUMC, isALNUMC_L1, isALNUMC, "XPosixAlnum", "XPosixAlnum");
+ case _C_C_T_(ALPHA, isALPHA_L1, isALPHA, "XPosixAlpha", "XPosixAlpha");
+ case _C_C_T_(BLANK, isBLANK_L1, isBLANK, "XPosixBlank", "XPosixBlank");
+ case _C_C_T_(CNTRL, isCNTRL_L1, isCNTRL, "XPosixCntrl", "XPosixCntrl");
+ case _C_C_T_(GRAPH, isGRAPH_L1, isGRAPH, "XPosixGraph", "XPosixGraph");
+ case _C_C_T_(LOWER, isLOWER_L1, isLOWER, "XPosixLower", "__XPosixLower_i");
+ case _C_C_T_(PRINT, isPRINT_L1, isPRINT, "XPosixPrint", "XPosixPrint");
+ case _C_C_T_(PSXSPC, isPSXSPC_L1, isPSXSPC, "XPosixSpace", "XPosixSpace");
+ case _C_C_T_(PUNCT, isPUNCT_L1, isPUNCT, "XPosixPunct", "XPosixPunct");
+ case _C_C_T_(UPPER, isUPPER_L1, isUPPER, "XPosixUpper", "__XPosixUpper_i");
/* \s, \w match all unicode if utf8. */
- case _C_C_T_(SPACE, isSPACE_L1, isSPACE, "SpacePerl");
- case _C_C_T_(ALNUM, isWORDCHAR_L1, isALNUM, "Word");
- case _C_C_T_(XDIGIT, isXDIGIT_L1, isXDIGIT, "XPosixXDigit");
+ case _C_C_T_(SPACE, isSPACE_L1, isSPACE, "SpacePerl", "SpacePerl");
+ case _C_C_T_(ALNUM, isWORDCHAR_L1, isALNUM, "Word", "Word");
+ case _C_C_T_(XDIGIT, isXDIGIT_L1, isXDIGIT, "XPosixXDigit", "XPosixXDigit");
case _C_C_T_NOLOC_(VERTWS, is_VERTWS_latin1(&value), "VertSpace");
case _C_C_T_NOLOC_(HORIZWS, is_HORIZWS_latin1(&value), "HorizSpace");
case ANYOF_ASCII:
diff --git a/t/re/re_tests b/t/re/re_tests
index 7b303c8755..e89e0ac6c8 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -1551,4 +1551,8 @@ abc\N{def - c - \\N{NAME} must be resolved by the lexer
/ffiffl/i abcdef\x{FB03}\x{FB04} y $& \x{FB03}\x{FB04}
/\xdf\xdf/ui abcdefssss y $& ssss
+# [perl #101970]
+/[[:lower:]]/i \x{100} y $& \x{100}
+/[[:upper:]]/i \x{101} y $& \x{101}
+
# vim: softtabstop=0 noexpandtab