summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--regcomp.c37
-rw-r--r--regcomp.h21
-rw-r--r--regexec.c8
3 files changed, 48 insertions, 18 deletions
diff --git a/regcomp.c b/regcomp.c
index 5c5cda9519..44fb035dcf 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -1177,7 +1177,9 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t *pRExC_state,
/* If this can match all upper Latin1 code points, have to add them
* as well */
- if (ANYOF_FLAGS(node) & ANYOF_MATCHES_ALL_NON_UTF8_NON_ASCII) {
+ if (OP(node) == ANYOFD
+ && (ANYOF_FLAGS(node) & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER))
+ {
_invlist_union(invlist, PL_UpperLatin1, &invlist);
}
@@ -1255,12 +1257,19 @@ S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
* that should be; while the consequences for having /l bugs is
* incorrect matches */
if (ssc_is_anything((regnode_ssc *)and_with)) {
- anded_flags |= ANYOF_WARN_SUPER;
+ anded_flags |= ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER;
}
}
else {
anded_cp_list = get_ANYOF_cp_list_for_ssc(pRExC_state, and_with);
- anded_flags = ANYOF_FLAGS(and_with) & ANYOF_COMMON_FLAGS;
+ if (OP(and_with) == ANYOFD) {
+ anded_flags = ANYOF_FLAGS(and_with) & ANYOF_COMMON_FLAGS;
+ }
+ else {
+ anded_flags = ANYOF_FLAGS(and_with)
+ &( ANYOF_COMMON_FLAGS
+ |ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER);
+ }
}
ANYOF_FLAGS(ssc) &= anded_flags;
@@ -1411,6 +1420,11 @@ S_ssc_or(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
else {
ored_cp_list = get_ANYOF_cp_list_for_ssc(pRExC_state, or_with);
ored_flags = ANYOF_FLAGS(or_with) & ANYOF_COMMON_FLAGS;
+ if (OP(or_with) != ANYOFD) {
+ ored_flags
+ |= ANYOF_FLAGS(or_with)
+ & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER;
+ }
}
ANYOF_FLAGS(ssc) |= ored_flags;
@@ -1609,7 +1623,9 @@ S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, regnode_ssc *ssc)
/* The code in this file assumes that all but these flags aren't relevant
* to the SSC, except SSC_MATCHES_EMPTY_STRING, which should be cleared
* by the time we reach here */
- assert(! (ANYOF_FLAGS(ssc) & ~ANYOF_COMMON_FLAGS));
+ assert(! (ANYOF_FLAGS(ssc)
+ & ~( ANYOF_COMMON_FLAGS
+ |ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)));
populate_ANYOF_from_invlist( (regnode *) ssc, &invlist);
@@ -15684,7 +15700,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
if (DEPENDS_SEMANTICS) {
/* Under /d, everything in the upper half of the Latin1 range
* matches these complements */
- ANYOF_FLAGS(ret) |= ANYOF_MATCHES_ALL_NON_UTF8_NON_ASCII;
+ ANYOF_FLAGS(ret) |= ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER;
}
else if (AT_LEAST_ASCII_RESTRICTED) {
/* Under /a and /aa, everything above ASCII matches these
@@ -15771,7 +15787,8 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
}
if (warn_super) {
- ANYOF_FLAGS(ret) |= ANYOF_WARN_SUPER;
+ ANYOF_FLAGS(ret)
+ |= ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER;
}
}
@@ -15866,7 +15883,9 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth,
/* We don't optimize if we are supposed to make sure all non-Unicode
* code points raise a warning, as only ANYOF nodes have this check.
* */
- && ! ((ANYOF_FLAGS(ret) & ANYOF_WARN_SUPER) && ALWAYS_WARN_SUPER))
+ && ! ((ANYOF_FLAGS(ret) & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)
+ && OP(ret) != ANYOFD
+ && ALWAYS_WARN_SUPER))
{
UV start, end;
U8 op = END; /* The optimzation node-type */
@@ -17052,7 +17071,9 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_
sv_catpvs(sv, "^");
}
- if (flags & ANYOF_MATCHES_ALL_NON_UTF8_NON_ASCII) {
+ if (OP(o) == ANYOFD
+ && (flags & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER))
+ {
sv_catpvs(sv, "{non-utf8-latin1-all}");
}
diff --git a/regcomp.h b/regcomp.h
index 897d35b7d8..7e43908e30 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -378,7 +378,7 @@ struct regnode_ssc {
* reach this high). */
#define ANYOF_ONLY_HAS_BITMAP ((U32) -1)
-/* Flags for node->flags of ANYOF. These are in short supply, with none
+/* Flags for node->flags of ANYOF. These are in short supply, with one
* currently available. The ABOVE_BITMAP_ALL bit could be freed up
* by resorting to creating a swash containing everything above 255. This
* introduces a performance penalty. An option that wouldn't slow things down
@@ -426,9 +426,6 @@ struct regnode_ssc {
* at compile-time */
#define ANYOF_MATCHES_POSIXL 0x08
-/* Should we raise a warning if matching against an above-Unicode code point?
- * */
-#define ANYOF_WARN_SUPER 0x10
/* Can match something outside the bitmap that isn't in utf8 */
#define ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES 0x20
@@ -436,9 +433,17 @@ struct regnode_ssc {
/* Matches every code point NUM_ANYOF_CODE_POINTS and above*/
#define ANYOF_MATCHES_ALL_ABOVE_BITMAP 0x40
-/* Match all Latin1 characters that aren't ASCII when the target string is not
- * in utf8. */
-#define ANYOF_MATCHES_ALL_NON_UTF8_NON_ASCII 0x80
+
+/* Shared bit:
+ * Under /d it means the ANYOF node matches all non-ASCII Latin1
+ * characters when the target string is not in utf8.
+ * When not under /d, it means the ANYOF node should raise a warning if
+ * matching against an above-Unicode code point.
+ * (These uses are mutually exclusive because the warning requires a \p{}, and
+ * \p{} implies /u which deselects /d). An SSC node only has this bit set if
+ * what is meant is the warning. The long macro name is to make sure that you
+ * are cautioned about its shared nature */
+#define ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER 0x80
#define ANYOF_FLAGS_ALL (0xff)
@@ -447,7 +452,7 @@ struct regnode_ssc {
/* These are the flags that apply to both regular ANYOF nodes and synthetic
* start class nodes during construction of the SSC. During finalization of
* the SSC, other of the flags could be added to it */
-#define ANYOF_COMMON_FLAGS (ANYOF_WARN_SUPER|ANYOF_HAS_UTF8_NONBITMAP_MATCHES)
+#define ANYOF_COMMON_FLAGS (ANYOF_HAS_UTF8_NONBITMAP_MATCHES)
/* Character classes for node->classflags of ANYOF */
/* Should be synchronized with a table in regprop() */
diff --git a/regexec.c b/regexec.c
index 4aa80fb074..78ad2bcada 100644
--- a/regexec.c
+++ b/regexec.c
@@ -8598,7 +8598,9 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
if (c < NUM_ANYOF_CODE_POINTS) {
if (ANYOF_BITMAP_TEST(n, c))
match = TRUE;
- else if ((flags & ANYOF_MATCHES_ALL_NON_UTF8_NON_ASCII)
+ else if ((flags
+ & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)
+ && OP(n) == ANYOFD
&& ! utf8_target
&& ! isASCII(c))
{
@@ -8701,7 +8703,9 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * const n, const U8* const
}
if (UNICODE_IS_SUPER(c)
- && (flags & ANYOF_WARN_SUPER)
+ && (flags
+ & ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER)
+ && OP(n) != ANYOFD
&& ckWARN_d(WARN_NON_UNICODE))
{
Perl_warner(aTHX_ packWARN(WARN_NON_UNICODE),