summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorKarl Williamson <khw@cpan.org>2015-01-19 12:47:41 -0700
committerKarl Williamson <khw@cpan.org>2015-01-20 10:38:03 -0700
commitb927b7e95b7031e0f55821c537e194ad78fd3a09 (patch)
tree3e7a60c4a058774bc9d7c0faf3bb869624863010 /regcomp.c
parentdfa1e3a5359a09e2f8ad21051412248182399696 (diff)
downloadperl-b927b7e95b7031e0f55821c537e194ad78fd3a09.tar.gz
Add portablity warning for re 'strict'
When a range in a bracketed character class has one end be specified as Unicode, the whole range is viewed as Unicode. Currently this is not warned about, though it is somewhat like mixing apples and oranges. This commit adds a warning, but only under "use re 'strict'", and it now documents the only one-end behavior.
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c33
1 files changed, 19 insertions, 14 deletions
diff --git a/regcomp.c b/regcomp.c
index 6e0daba7c2..7d5c50ca18 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -641,6 +641,12 @@ static const scan_data_t zero_scan_data =
REPORT_LOCATION_ARGS(offset)); \
} STMT_END
+#define vWARN(loc, m) STMT_START { \
+ const IV offset = loc - RExC_precomp; \
+ __ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_REGEXP), m REPORT_LOCATION, \
+ REPORT_LOCATION_ARGS(offset)); \
+} STMT_END
+
#define vWARN_dep(loc, m) STMT_START { \
const IV offset = loc - RExC_precomp; \
__ASSERT_(PASS2) Perl_warner(aTHX_ packWARN(WARN_DEPRECATED), m REPORT_LOCATION, \
@@ -13810,7 +13816,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
* runtime locale is UTF-8 */
SV* only_utf8_locale_list = NULL;
-#ifdef EBCDIC
/* In a range, if one of the endpoints is non-character-set portable,
* meaning that it hard-codes a code point that may mean a different
* charactger in ASCII vs. EBCDIC, as opposed to, say, a literal 'A' or a
@@ -13822,7 +13827,6 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
* to Unicode (i.e. non-ASCII), each code point in it should be considered
* to be a Unicode value. */
bool unicode_range = FALSE;
-#endif
bool invert = FALSE; /* Is this class to be complemented */
bool warn_super = ALWAYS_WARN_SUPER;
@@ -13926,9 +13930,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
if (!range) {
rangebegin = RExC_parse;
element_count++;
-#ifdef EBCDIC
non_portable_endpoint = 0;
-#endif
}
if (UTF) {
value = utf8n_to_uvchr((U8*)RExC_parse,
@@ -14027,10 +14029,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
prevvalue = save_prevvalue;
continue; /* Back to top of loop to get next char */
}
+
/* Here, is a single code point, and <value> contains it */
-#ifdef EBCDIC
unicode_range = TRUE; /* \N{} are Unicode */
-#endif
}
break;
case 'p':
@@ -14228,9 +14229,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
vFAIL(error_msg);
}
}
-#ifdef EBCDIC
non_portable_endpoint++;
-#endif
if (IN_ENCODING && value < 0x100) {
goto recode_encoding;
}
@@ -14250,17 +14249,13 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
vFAIL(error_msg);
}
}
-#ifdef EBCDIC
non_portable_endpoint++;
-#endif
if (IN_ENCODING && value < 0x100)
goto recode_encoding;
break;
case 'c':
value = grok_bslash_c(*RExC_parse++, PASS2);
-#ifdef EBCDIC
non_portable_endpoint++;
-#endif
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7':
@@ -14288,9 +14283,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
(void)ReREFCNT_inc(RExC_rx_sv);
}
}
-#ifdef EBCDIC
non_portable_endpoint++;
-#endif
if (IN_ENCODING && value < 0x100)
goto recode_encoding;
break;
@@ -14662,6 +14655,18 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
}
}
+ if (strict && PASS2 && ckWARN(WARN_REGEXP)) {
+ if (range) {
+
+ /* If the range starts above 255, everything is portable and
+ * likely to be so for any forseeable character set, so don't
+ * warn. */
+ if (unicode_range && non_portable_endpoint && prevvalue < 256) {
+ vWARN(RExC_parse, "Both or neither range ends should be Unicode");
+ }
+ }
+ }
+
/* Deal with this element of the class */
if (! SIZE_ONLY) {
#ifndef EBCDIC