summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--regcomp.c27
-rw-r--r--t/re/re_tests12
2 files changed, 24 insertions, 15 deletions
diff --git a/regcomp.c b/regcomp.c
index 6ecd0bf1bd..330d39b421 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -727,6 +727,9 @@ S_cl_anything(struct regnode_charclass_class *cl)
ANYOF_BITMAP_SETALL(cl);
ANYOF_CLASS_ZERO(cl); /* all bits set, so class is irrelevant */
cl->flags = ANYOF_EOS|ANYOF_UNICODE_ALL|ANYOF_LOC_NONBITMAP_FOLD|ANYOF_NON_UTF8_LATIN1_ALL|ANYOF_LOCALE;
+ /* The above set locale which given the current logic may not get cleared
+ * even if no locale is in the regex, which may lead to false positives;
+ * see the commit message */
}
/* Can match anything (initialization) */
@@ -3240,6 +3243,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
data->start_class->flags &= ~ANYOF_EOS;
data->start_class->flags |= ANYOF_LOC_NONBITMAP_FOLD;
if (OP(scan) == EXACTFL) {
+ /* XXX This set is probably no longer necessary, and
+ * probably wrong as LOCALE now is on in the initial
+ * state */
data->start_class->flags |= ANYOF_LOCALE;
}
else {
@@ -3752,7 +3758,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
else {
if (data->start_class->flags & ANYOF_LOCALE)
ANYOF_CLASS_SET(data->start_class,ANYOF_ALNUM);
- else if (OP(scan) == ALNUMU) {
+
+ /* Even if under locale, set the bits for non-locale
+ * in case it isn't a true locale-node. This will
+ * create false positives if it truly is locale */
+ if (OP(scan) == ALNUMU) {
for (value = 0; value < 256; value++) {
if (isWORDCHAR_L1(value)) {
ANYOF_BITMAP_SET(data->start_class, value);
@@ -3789,7 +3799,11 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
else {
if (data->start_class->flags & ANYOF_LOCALE)
ANYOF_CLASS_SET(data->start_class,ANYOF_NALNUM);
- else {
+
+ /* Even if under locale, set the bits for
+ * non-locale in case it isn't a true locale-node.
+ * This will create false positives if it truly is
+ * locale */
if (OP(scan) == NALNUMU) {
for (value = 0; value < 256; value++) {
if (! isWORDCHAR_L1(value)) {
@@ -3803,7 +3817,6 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
}
}
}
- }
}
break;
case SPACE:
@@ -3829,7 +3842,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
if (data->start_class->flags & ANYOF_LOCALE) {
ANYOF_CLASS_SET(data->start_class,ANYOF_SPACE);
}
- else if (OP(scan) == SPACEU) {
+ if (OP(scan) == SPACEU) {
for (value = 0; value < 256; value++) {
if (isSPACE_L1(value)) {
ANYOF_BITMAP_SET(data->start_class, value);
@@ -3866,7 +3879,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
else {
if (data->start_class->flags & ANYOF_LOCALE)
ANYOF_CLASS_SET(data->start_class,ANYOF_NSPACE);
- else if (OP(scan) == NSPACEU) {
+ if (OP(scan) == NSPACEU) {
for (value = 0; value < 256; value++) {
if (!isSPACE_L1(value)) {
ANYOF_BITMAP_SET(data->start_class, value);
@@ -3894,11 +3907,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
else {
if (data->start_class->flags & ANYOF_LOCALE)
ANYOF_CLASS_SET(data->start_class,ANYOF_DIGIT);
- else {
for (value = 0; value < 256; value++)
if (isDIGIT(value))
ANYOF_BITMAP_SET(data->start_class, value);
- }
}
break;
case NDIGIT:
@@ -3912,11 +3923,9 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp,
else {
if (data->start_class->flags & ANYOF_LOCALE)
ANYOF_CLASS_SET(data->start_class,ANYOF_NDIGIT);
- else {
for (value = 0; value < 256; value++)
if (!isDIGIT(value))
ANYOF_BITMAP_SET(data->start_class, value);
- }
}
break;
CASE_SYNST_FNC(VERTWS);
diff --git a/t/re/re_tests b/t/re/re_tests
index 0f19ae21d1..b3815298bb 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -1498,16 +1498,16 @@ abc\N{def - c - \\N{NAME} must be resolved by the lexer
(?{})[\x{100}] \x{100} y $& \x{100}
# RT #85964
-^m?(\S)(.*)\1$ aba Ty $1 a
+^m?(\S)(.*)\1$ aba y $1 a
^m?(\S)(.*)\1$ \tb\t n - -
-^m?(\s)(.*)\1$ \tb\t Ty $1 \t
+^m?(\s)(.*)\1$ \tb\t y $1 \t
^m?(\s)(.*)\1$ aba n - -
-^m?(\W)(.*)\1$ :b: Ty $1 :
+^m?(\W)(.*)\1$ :b: y $1 :
^m?(\W)(.*)\1$ aba n - -
-^m?(\w)(.*)\1$ aba Ty $1 a
+^m?(\w)(.*)\1$ aba y $1 a
^m?(\w)(.*)\1$ :b: n - -
-^m?(\D)(.*)\1$ aba Ty $1 a
+^m?(\D)(.*)\1$ aba y $1 a
^m?(\D)(.*)\1$ 5b5 n - -
-^m?(\d)(.*)\1$ 5b5 Ty $1 5
+^m?(\d)(.*)\1$ 5b5 y $1 5
^m?(\d)(.*)\1$ aba n - -
# vim: softtabstop=0 noexpandtab