summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--regcomp.c22
-rw-r--r--regcomp.h5
-rw-r--r--regexec.c4
-rw-r--r--utf8.h2
4 files changed, 18 insertions, 15 deletions
diff --git a/regcomp.c b/regcomp.c
index 26d480f16b..d3eebdc3b8 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -783,18 +783,18 @@ S_cl_and(struct regnode_charclass_class *cl,
if (!(and_with->flags & ANYOF_FOLD))
cl->flags &= ~ANYOF_FOLD;
- if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_UNICODE &&
+ if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_NONBITMAP &&
!(and_with->flags & ANYOF_INVERT)) {
cl->flags &= ~ANYOF_UNICODE_ALL;
- cl->flags |= ANYOF_UNICODE;
+ cl->flags |= ANYOF_NONBITMAP;
ARG_SET(cl, ARG(and_with));
}
if (!(and_with->flags & ANYOF_UNICODE_ALL) &&
!(and_with->flags & ANYOF_INVERT))
cl->flags &= ~ANYOF_UNICODE_ALL;
- if (!(and_with->flags & (ANYOF_UNICODE|ANYOF_UNICODE_ALL)) &&
+ if (!(and_with->flags & (ANYOF_NONBITMAP|ANYOF_UNICODE_ALL)) &&
!(and_with->flags & ANYOF_INVERT))
- cl->flags &= ~ANYOF_UNICODE;
+ cl->flags &= ~ANYOF_NONBITMAP;
}
/* 'OR' a given class with another one. Can create false positives */
@@ -851,14 +851,14 @@ S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, con
if (or_with->flags & ANYOF_FOLD)
cl->flags |= ANYOF_FOLD;
- if (cl->flags & ANYOF_UNICODE && or_with->flags & ANYOF_UNICODE &&
+ if (cl->flags & ANYOF_NONBITMAP && or_with->flags & ANYOF_NONBITMAP &&
ARG(cl) != ARG(or_with)) {
cl->flags |= ANYOF_UNICODE_ALL;
- cl->flags &= ~ANYOF_UNICODE;
+ cl->flags &= ~ANYOF_NONBITMAP;
}
if (or_with->flags & ANYOF_UNICODE_ALL) {
cl->flags |= ANYOF_UNICODE_ALL;
- cl->flags &= ~ANYOF_UNICODE;
+ cl->flags &= ~ANYOF_NONBITMAP;
}
}
@@ -8317,7 +8317,7 @@ parseit:
(value=='p' ? '+' : '!'), (int)n, RExC_parse);
}
RExC_parse = e + 1;
- ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
+ ANYOF_FLAGS(ret) |= ANYOF_NONBITMAP;
namedclass = ANYOF_MAX; /* no official name, but it's named */
}
break;
@@ -8441,7 +8441,7 @@ parseit:
ANYOF_BITMAP_SET(ret, '-');
}
else {
- ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
+ ANYOF_FLAGS(ret) |= ANYOF_NONBITMAP;
Perl_sv_catpvf(aTHX_ listsv,
"%04"UVxf"\n%04"UVxf"\n", (UV)prevvalue, (UV) '-');
}
@@ -8631,7 +8631,7 @@ parseit:
const UV prevnatvalue = NATIVE_TO_UNI(prevvalue);
const UV natvalue = NATIVE_TO_UNI(value);
stored+=2; /* can't optimize this class */
- ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
+ ANYOF_FLAGS(ret) |= ANYOF_NONBITMAP;
if (prevnatvalue < natvalue) { /* what about > ? */
Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n",
prevnatvalue, natvalue);
@@ -9530,7 +9530,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o)
EMIT_ANYOF_TEST_SEPARATOR(do_sep,sv,flags);
/* output information about the unicode matching */
- if (flags & ANYOF_UNICODE)
+ if (flags & ANYOF_NONBITMAP)
sv_catpvs(sv, "{unicode}");
else if (flags & ANYOF_UNICODE_ALL)
sv_catpvs(sv, "{unicode_all}");
diff --git a/regcomp.h b/regcomp.h
index 7b73b02b42..4fb119ed01 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -326,7 +326,10 @@ struct regnode_charclass_class {
/* EOS used for regstclass only */
#define ANYOF_EOS 0x10 /* Can match an empty string too */
-#define ANYOF_UNICODE 0x20 /* Matches >= one thing past 0xff */
+/* Set if the bitmap doesn't fully represent what this node can match */
+#define ANYOF_NONBITMAP 0x20
+#define ANYOF_UNICODE ANYOF_NONBITMAP /* old name, for back compat */
+
#define ANYOF_UNICODE_ALL 0x40 /* Matches 0x100 - infinity */
#define ANYOF_FLAGS_ALL 0xff
diff --git a/regexec.c b/regexec.c
index e0be8b05f0..b0e5777824 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1359,7 +1359,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
switch (OP(c)) {
case ANYOF:
if (utf8_target) {
- REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) ||
+ REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_NONBITMAP) ||
!UTF8_IS_INVARIANT((U8)s[0]) ?
reginclass(prog, c, (U8*)s, 0, utf8_target) :
REGINCLASS(prog, c, (U8*)s));
@@ -6299,7 +6299,7 @@ S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n,
/* If the bitmap didn't (or couldn't) match, and something outside the
* bitmap could match, try that */
- if (!match && (utf8_target || (flags & ANYOF_UNICODE))) {
+ if (!match && (utf8_target || (flags & ANYOF_NONBITMAP))) {
if (utf8_target && (flags & ANYOF_UNICODE_ALL) && c >= 256) {
match = TRUE;
}
diff --git a/utf8.h b/utf8.h
index 8e6d4e0687..84ee9db822 100644
--- a/utf8.h
+++ b/utf8.h
@@ -280,7 +280,7 @@ Perl's extended UTF-8 means we can have start bytes up to FF.
#define ANYOF_FOLD_SHARP_S(node, input, end) \
(ANYOF_BITMAP_TEST(node, LATIN_SMALL_LETTER_SHARP_S) && \
- (ANYOF_FLAGS(node) & ANYOF_UNICODE) && \
+ (ANYOF_FLAGS(node) & ANYOF_NONBITMAP) && \
(ANYOF_FLAGS(node) & ANYOF_FOLD) && \
((end) > (input) + 1) && \
toLOWER((input)[0]) == 's' && \