diff options
-rw-r--r-- | regcomp.c | 22 | ||||
-rw-r--r-- | regcomp.h | 5 | ||||
-rw-r--r-- | regexec.c | 4 | ||||
-rw-r--r-- | utf8.h | 2 |
4 files changed, 18 insertions, 15 deletions
@@ -783,18 +783,18 @@ S_cl_and(struct regnode_charclass_class *cl, if (!(and_with->flags & ANYOF_FOLD)) cl->flags &= ~ANYOF_FOLD; - if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_UNICODE && + if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_NONBITMAP && !(and_with->flags & ANYOF_INVERT)) { cl->flags &= ~ANYOF_UNICODE_ALL; - cl->flags |= ANYOF_UNICODE; + cl->flags |= ANYOF_NONBITMAP; ARG_SET(cl, ARG(and_with)); } if (!(and_with->flags & ANYOF_UNICODE_ALL) && !(and_with->flags & ANYOF_INVERT)) cl->flags &= ~ANYOF_UNICODE_ALL; - if (!(and_with->flags & (ANYOF_UNICODE|ANYOF_UNICODE_ALL)) && + if (!(and_with->flags & (ANYOF_NONBITMAP|ANYOF_UNICODE_ALL)) && !(and_with->flags & ANYOF_INVERT)) - cl->flags &= ~ANYOF_UNICODE; + cl->flags &= ~ANYOF_NONBITMAP; } /* 'OR' a given class with another one. Can create false positives */ @@ -851,14 +851,14 @@ S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, con if (or_with->flags & ANYOF_FOLD) cl->flags |= ANYOF_FOLD; - if (cl->flags & ANYOF_UNICODE && or_with->flags & ANYOF_UNICODE && + if (cl->flags & ANYOF_NONBITMAP && or_with->flags & ANYOF_NONBITMAP && ARG(cl) != ARG(or_with)) { cl->flags |= ANYOF_UNICODE_ALL; - cl->flags &= ~ANYOF_UNICODE; + cl->flags &= ~ANYOF_NONBITMAP; } if (or_with->flags & ANYOF_UNICODE_ALL) { cl->flags |= ANYOF_UNICODE_ALL; - cl->flags &= ~ANYOF_UNICODE; + cl->flags &= ~ANYOF_NONBITMAP; } } @@ -8317,7 +8317,7 @@ parseit: (value=='p' ? '+' : '!'), (int)n, RExC_parse); } RExC_parse = e + 1; - ANYOF_FLAGS(ret) |= ANYOF_UNICODE; + ANYOF_FLAGS(ret) |= ANYOF_NONBITMAP; namedclass = ANYOF_MAX; /* no official name, but it's named */ } break; @@ -8441,7 +8441,7 @@ parseit: ANYOF_BITMAP_SET(ret, '-'); } else { - ANYOF_FLAGS(ret) |= ANYOF_UNICODE; + ANYOF_FLAGS(ret) |= ANYOF_NONBITMAP; Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\n%04"UVxf"\n", (UV)prevvalue, (UV) '-'); } @@ -8631,7 +8631,7 @@ parseit: const UV prevnatvalue = NATIVE_TO_UNI(prevvalue); const UV natvalue = NATIVE_TO_UNI(value); stored+=2; /* can't optimize this class */ - ANYOF_FLAGS(ret) |= ANYOF_UNICODE; + ANYOF_FLAGS(ret) |= ANYOF_NONBITMAP; if (prevnatvalue < natvalue) { /* what about > ? */ Perl_sv_catpvf(aTHX_ listsv, "%04"UVxf"\t%04"UVxf"\n", prevnatvalue, natvalue); @@ -9530,7 +9530,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o) EMIT_ANYOF_TEST_SEPARATOR(do_sep,sv,flags); /* output information about the unicode matching */ - if (flags & ANYOF_UNICODE) + if (flags & ANYOF_NONBITMAP) sv_catpvs(sv, "{unicode}"); else if (flags & ANYOF_UNICODE_ALL) sv_catpvs(sv, "{unicode_all}"); @@ -326,7 +326,10 @@ struct regnode_charclass_class { /* EOS used for regstclass only */ #define ANYOF_EOS 0x10 /* Can match an empty string too */ -#define ANYOF_UNICODE 0x20 /* Matches >= one thing past 0xff */ +/* Set if the bitmap doesn't fully represent what this node can match */ +#define ANYOF_NONBITMAP 0x20 +#define ANYOF_UNICODE ANYOF_NONBITMAP /* old name, for back compat */ + #define ANYOF_UNICODE_ALL 0x40 /* Matches 0x100 - infinity */ #define ANYOF_FLAGS_ALL 0xff @@ -1359,7 +1359,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, switch (OP(c)) { case ANYOF: if (utf8_target) { - REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_UNICODE) || + REXEC_FBC_UTF8_CLASS_SCAN((ANYOF_FLAGS(c) & ANYOF_NONBITMAP) || !UTF8_IS_INVARIANT((U8)s[0]) ? reginclass(prog, c, (U8*)s, 0, utf8_target) : REGINCLASS(prog, c, (U8*)s)); @@ -6299,7 +6299,7 @@ S_reginclass(pTHX_ const regexp * const prog, register const regnode * const n, /* If the bitmap didn't (or couldn't) match, and something outside the * bitmap could match, try that */ - if (!match && (utf8_target || (flags & ANYOF_UNICODE))) { + if (!match && (utf8_target || (flags & ANYOF_NONBITMAP))) { if (utf8_target && (flags & ANYOF_UNICODE_ALL) && c >= 256) { match = TRUE; } @@ -280,7 +280,7 @@ Perl's extended UTF-8 means we can have start bytes up to FF. #define ANYOF_FOLD_SHARP_S(node, input, end) \ (ANYOF_BITMAP_TEST(node, LATIN_SMALL_LETTER_SHARP_S) && \ - (ANYOF_FLAGS(node) & ANYOF_UNICODE) && \ + (ANYOF_FLAGS(node) & ANYOF_NONBITMAP) && \ (ANYOF_FLAGS(node) & ANYOF_FOLD) && \ ((end) > (input) + 1) && \ toLOWER((input)[0]) == 's' && \ |