diff options
-rw-r--r-- | dump.c | 4 | ||||
-rw-r--r-- | ext/re/re.xs | 10 | ||||
-rw-r--r-- | mg.c | 2 | ||||
-rw-r--r-- | op.c | 2 | ||||
-rw-r--r-- | op.h | 4 | ||||
-rw-r--r-- | pp.c | 12 | ||||
-rw-r--r-- | pp_hot.c | 26 | ||||
-rw-r--r-- | regcomp.c | 137 | ||||
-rw-r--r-- | regexec.c | 58 | ||||
-rw-r--r-- | regexp.h | 200 |
10 files changed, 256 insertions, 199 deletions
@@ -557,9 +557,9 @@ S_pm_description(pTHX_ const PMOP *pm) if (pmflags & PMf_ONCE) sv_catpv(desc, ",ONCE"); if (regex && regex->check_substr) { - if (!(regex->reganch & ROPT_NOSCAN)) + if (!(regex->extflags & RXf_NOSCAN)) sv_catpv(desc, ",SCANFIRST"); - if (regex->reganch & ROPT_CHECK_ALL) + if (regex->extflags & RXf_CHECK_ALL) sv_catpv(desc, ",ALL"); } if (pmflags & PMf_SKIPWHITE) diff --git a/ext/re/re.xs b/ext/re/re.xs index f12ce39811..c93a0b44b5 100644 --- a/ext/re/re.xs +++ b/ext/re/re.xs @@ -115,17 +115,17 @@ PPCODE: char *fptr = "msix"; char ch; - U16 reganch = (U16)((re->reganch & PMf_COMPILETIME) >> 12); + U16 match_flags = (U16)((re->extflags & PMf_COMPILETIME) >> 12); while((ch = *fptr++)) { - if(reganch & 1) { + if(match_flags & 1) { reflags[left++] = ch; } - reganch >>= 1; + match_flags >>= 1; } pattern = sv_2mortal(newSVpvn(re->precomp,re->prelen)); - if (re->reganch & ROPT_UTF8) SvUTF8_on(pattern); + if (re->extflags & RXf_UTF8) SvUTF8_on(pattern); /* return the pattern and the modifiers */ XPUSHs(pattern); @@ -138,7 +138,7 @@ PPCODE: /* return the pattern in (?msix:..) format */ pattern = sv_2mortal(newSVpvn(mg->mg_ptr,mg->mg_len)); - if (re->reganch & ROPT_UTF8) + if (re->extflags & RXf_UTF8) SvUTF8_on(pattern); XPUSHs(pattern); XSRETURN(1); @@ -871,7 +871,7 @@ Perl_magic_get(pTHX_ SV *sv, MAGIC *mg) TAINT_NOT; sv_setpvn(sv, s, i); PL_tainted = oldtainted; - if ( (rx->reganch & ROPT_CANY_SEEN) + if ( (rx->extflags & RXf_CANY_SEEN) ? (RX_MATCH_UTF8(rx) && (!i || is_utf8_string((U8*)s, i))) : (RX_MATCH_UTF8(rx)) ) @@ -3327,7 +3327,7 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg) if (curop == repl && !(repl_has_vars && (!PM_GETRE(pm) - || PM_GETRE(pm)->reganch & ROPT_EVAL_SEEN))) { + || PM_GETRE(pm)->extflags & RXf_EVAL_SEEN))) { pm->op_pmflags |= PMf_CONST; /* const for long enough */ pm->op_pmpermflags |= PMf_CONST; /* const for long enough */ prepend_elem(o->op_type, scalar(repl), o); @@ -360,7 +360,9 @@ struct pmop { #define PMf_FOLD 0x4000 /* case insensitivity */ #define PMf_EXTENDED 0x8000 /* chuck embedded whitespace */ -/* mask of bits stored in regexp->reganch */ +/* mask of bits stored in regexp->extflags + these all are also called RXf_PMf_xyz + */ #define PMf_COMPILETIME (PMf_MULTILINE|PMf_SINGLELINE|PMf_LOCALE|PMf_FOLD|PMf_EXTENDED) #ifdef USE_ITHREADS @@ -4645,15 +4645,15 @@ PP(pp_split) s = m; } } - else if (do_utf8 == ((rx->reganch & ROPT_UTF8) != 0) && - (rx->reganch & RE_USE_INTUIT) && !rx->nparens - && (rx->reganch & ROPT_CHECK_ALL) - && !(rx->reganch & ROPT_ANCH)) { - const int tail = (rx->reganch & RE_INTUIT_TAIL); + else if (do_utf8 == ((rx->extflags & RXf_UTF8) != 0) && + (rx->extflags & RXf_USE_INTUIT) && !rx->nparens + && (rx->extflags & RXf_CHECK_ALL) + && !(rx->extflags & RXf_ANCH)) { + const int tail = (rx->extflags & RXf_INTUIT_TAIL); SV * const csv = CALLREG_INTUIT_STRING(rx); len = rx->minlenret; - if (len == 1 && !(rx->reganch & ROPT_UTF8) && !tail) { + if (len == 1 && !(rx->extflags & RXf_UTF8) && !tail) { const char c = *SvPV_nolen_const(csv); while (--limit) { for (m = s; m < strend && *m != c; m++) @@ -1351,12 +1351,12 @@ PP(pp_match) if (SvTYPE(TARG) >= SVt_PVMG && SvMAGIC(TARG)) { MAGIC* const mg = mg_find(TARG, PERL_MAGIC_regex_global); if (mg && mg->mg_len >= 0) { - if (!(rx->reganch & ROPT_GPOS_SEEN)) + if (!(rx->extflags & RXf_GPOS_SEEN)) rx->endp[0] = rx->startp[0] = mg->mg_len; - else if (rx->reganch & ROPT_ANCH_GPOS) { + else if (rx->extflags & RXf_ANCH_GPOS) { r_flags |= REXEC_IGNOREPOS; rx->endp[0] = rx->startp[0] = mg->mg_len; - } else if (rx->reganch & ROPT_GPOS_FLOAT) + } else if (rx->extflags & RXf_GPOS_FLOAT) gpos = mg->mg_len; else rx->endp[0] = rx->startp[0] = mg->mg_len; @@ -1381,18 +1381,18 @@ play_it_again: if (update_minmatch++) minmatch = had_zerolen; } - if (rx->reganch & RE_USE_INTUIT && - DO_UTF8(TARG) == ((rx->reganch & ROPT_UTF8) != 0)) { + if (rx->extflags & RXf_USE_INTUIT && + DO_UTF8(TARG) == ((rx->extflags & RXf_UTF8) != 0)) { /* FIXME - can PL_bostr be made const char *? */ PL_bostr = (char *)truebase; s = CALLREG_INTUIT_START(rx, TARG, (char *)s, (char *)strend, r_flags, NULL); if (!s) goto nope; - if ( (rx->reganch & ROPT_CHECK_ALL) + if ( (rx->extflags & RXf_CHECK_ALL) && !PL_sawampersand - && ((rx->reganch & ROPT_NOSCAN) - || !((rx->reganch & RE_INTUIT_TAIL) + && ((rx->extflags & RXf_NOSCAN) + || !((rx->extflags & RXf_INTUIT_TAIL) && (r_flags & REXEC_SCREAM))) && !SvROK(TARG)) /* Cannot trust since INTUIT cannot guess ^ */ goto yup; @@ -2155,17 +2155,17 @@ PP(pp_subst) r_flags |= REXEC_SCREAM; orig = m = s; - if (rx->reganch & RE_USE_INTUIT) { + if (rx->extflags & RXf_USE_INTUIT) { PL_bostr = orig; s = CALLREG_INTUIT_START(rx, TARG, s, strend, r_flags, NULL); if (!s) goto nope; /* How to do it in subst? */ -/* if ( (rx->reganch & ROPT_CHECK_ALL) +/* if ( (rx->extflags & RXf_CHECK_ALL) && !PL_sawampersand - && ((rx->reganch & ROPT_NOSCAN) - || !((rx->reganch & RE_INTUIT_TAIL) + && ((rx->extflags & RXf_NOSCAN) + || !((rx->extflags & RXf_INTUIT_TAIL) && (r_flags & REXEC_SCREAM)))) goto yup; */ @@ -2203,7 +2203,7 @@ PP(pp_subst) && !is_cow #endif && (I32)clen <= rx->minlenret && (once || !(r_flags & REXEC_COPY_STR)) - && !(rx->reganch & ROPT_LOOKBEHIND_SEEN) + && !(rx->extflags & RXf_LOOKBEHIND_SEEN) && (!doutf8 || SvUTF8(TARG))) { if (!CALLREGEXEC(rx, s, strend, orig, 0, TARG, NULL, r_flags | REXEC_CHECKED)) @@ -342,8 +342,8 @@ static const scan_data_t zero_scan_data = #define SCF_SEEN_ACCEPT 0x8000 #define UTF (RExC_utf8 != 0) -#define LOC ((RExC_flags & PMf_LOCALE) != 0) -#define FOLD ((RExC_flags & PMf_FOLD) != 0) +#define LOC ((RExC_flags & RXf_PMf_LOCALE) != 0) +#define FOLD ((RExC_flags & RXf_PMf_FOLD) != 0) #define OOB_UNICODE 12345678 #define OOB_NAMEDCLASS -1 @@ -3667,15 +3667,15 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, flags &= ~SCF_DO_STCLASS; } else if (OP(scan) == GPOS) { - if (!(RExC_rx->reganch & ROPT_GPOS_FLOAT) && + if (!(RExC_rx->extflags & RXf_GPOS_FLOAT) && !(delta || is_inf || (data && data->pos_delta))) { - if (!(RExC_rx->reganch & ROPT_ANCH) && (flags & SCF_DO_SUBSTR)) - RExC_rx->reganch |= ROPT_ANCH_GPOS; + if (!(RExC_rx->extflags & RXf_ANCH) && (flags & SCF_DO_SUBSTR)) + RExC_rx->extflags |= RXf_ANCH_GPOS; if (RExC_rx->gofs < (U32)min) RExC_rx->gofs = min; } else { - RExC_rx->reganch |= ROPT_GPOS_FLOAT; + RExC_rx->extflags |= RXf_GPOS_FLOAT; RExC_rx->gofs = 0; } } @@ -4076,7 +4076,8 @@ Perl_pregcomp(pTHX_ char *exp, char *xend, PMOP *pm) r->refcnt = 1; r->prelen = xend - exp; r->precomp = savepvn(RExC_precomp, r->prelen); - r->reganch = pm->op_pmflags & PMf_COMPILETIME; + r->extflags = pm->op_pmflags & RXf_PMf_COMPILETIME; + r->intflags = 0; r->nparens = RExC_npar - 1; /* set early to validate backrefs */ if (RExC_seen & REG_SEEN_RECURSE) { @@ -4156,13 +4157,13 @@ reStudy: #endif /* Dig out information for optimizations. */ - r->reganch = pm->op_pmflags & PMf_COMPILETIME; /* Again? */ + r->extflags = pm->op_pmflags & RXf_PMf_COMPILETIME; /* Again? */ pm->op_pmflags = RExC_flags; if (UTF) - r->reganch |= ROPT_UTF8; /* Unicode in it? */ + r->extflags |= RXf_UTF8; /* Unicode in it? */ r->regstclass = NULL; if (RExC_naughty >= 10) /* Probably an expensive pattern. */ - r->reganch |= ROPT_NAUGHTY; + r->intflags |= PREGf_NAUGHTY; scan = r->program + 1; /* First BRANCH. */ /* testing for BRANCH here tells us whether there is "must appear" @@ -4236,37 +4237,38 @@ reStudy: PL_regkind[OP(first)] == NBOUND) r->regstclass = first; else if (PL_regkind[OP(first)] == BOL) { - r->reganch |= (OP(first) == MBOL - ? ROPT_ANCH_MBOL + r->extflags |= (OP(first) == MBOL + ? RXf_ANCH_MBOL : (OP(first) == SBOL - ? ROPT_ANCH_SBOL - : ROPT_ANCH_BOL)); + ? RXf_ANCH_SBOL + : RXf_ANCH_BOL)); first = NEXTOPER(first); goto again; } else if (OP(first) == GPOS) { - r->reganch |= ROPT_ANCH_GPOS; + r->extflags |= RXf_ANCH_GPOS; first = NEXTOPER(first); goto again; } else if ((!sawopen || !RExC_sawback) && (OP(first) == STAR && PL_regkind[OP(NEXTOPER(first))] == REG_ANY) && - !(r->reganch & ROPT_ANCH) && !(RExC_seen & REG_SEEN_EVAL)) + !(r->extflags & RXf_ANCH) && !(RExC_seen & REG_SEEN_EVAL)) { /* turn .* into ^.* with an implied $*=1 */ const int type = (OP(NEXTOPER(first)) == REG_ANY) - ? ROPT_ANCH_MBOL - : ROPT_ANCH_SBOL; - r->reganch |= type | ROPT_IMPLICIT; + ? RXf_ANCH_MBOL + : RXf_ANCH_SBOL; + r->extflags |= type; + r->intflags |= PREGf_IMPLICIT; first = NEXTOPER(first); goto again; } if (sawplus && (!sawopen || !RExC_sawback) && !(RExC_seen & REG_SEEN_EVAL)) /* May examine pos and $& */ /* x+ must match at the 1st pos of run of x's */ - r->reganch |= ROPT_SKIP; + r->intflags |= PREGf_SKIP; /* Scan is after the zeroth branch, first is atomic matcher. */ #ifdef TRIE_STUDY_OPT @@ -4319,8 +4321,8 @@ reStudy: if ( RExC_npar == 1 && data.longest == &(data.longest_fixed) && data.last_start_min == 0 && data.last_end > 0 && !RExC_seen_zerolen - && (!(RExC_seen & REG_SEEN_GPOS) || (r->reganch & ROPT_ANCH_GPOS))) - r->reganch |= ROPT_CHECK_ALL; + && (!(RExC_seen & REG_SEEN_GPOS) || (r->extflags & RXf_ANCH_GPOS))) + r->extflags |= RXf_CHECK_ALL; scan_commit(pRExC_state, &data,&minlen); SvREFCNT_dec(data.last_found); @@ -4332,7 +4334,7 @@ reStudy: if (longest_float_length || (data.flags & SF_FL_BEFORE_EOL && (!(data.flags & SF_FL_BEFORE_MEOL) - || (RExC_flags & PMf_MULTILINE)))) + || (RExC_flags & RXf_PMf_MULTILINE)))) { I32 t,ml; @@ -4366,7 +4368,7 @@ reStudy: t = (data.flags & SF_FL_BEFORE_EOL /* Can't have SEOL and MULTI */ && (!(data.flags & SF_FL_BEFORE_MEOL) - || (RExC_flags & PMf_MULTILINE))); + || (RExC_flags & RXf_PMf_MULTILINE))); fbm_compile(data.longest_float, t ? FBMcf_TAIL : 0); } else { @@ -4384,7 +4386,7 @@ reStudy: if (longest_fixed_length || (data.flags & SF_FIX_BEFORE_EOL /* Cannot have SEOL and MULTI */ && (!(data.flags & SF_FIX_BEFORE_MEOL) - || (RExC_flags & PMf_MULTILINE)))) + || (RExC_flags & RXf_PMf_MULTILINE)))) { I32 t,ml; @@ -4410,7 +4412,7 @@ reStudy: t = (data.flags & SF_FIX_BEFORE_EOL /* Can't have SEOL and MULTI */ && (!(data.flags & SF_FIX_BEFORE_MEOL) - || (RExC_flags & PMf_MULTILINE))); + || (RExC_flags & RXf_PMf_MULTILINE))); fbm_compile(data.longest_fixed, t ? FBMcf_TAIL : 0); } else { @@ -4434,7 +4436,7 @@ reStudy: (struct regnode_charclass_class*)RExC_rx->data->data[n], struct regnode_charclass_class); r->regstclass = (regnode*)RExC_rx->data->data[n]; - r->reganch &= ~ROPT_SKIP; /* Used in find_byclass(). */ + r->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */ DEBUG_COMPILE_r({ SV *sv = sv_newmortal(); regprop(r, sv, (regnode*)data.start_class); PerlIO_printf(Perl_debug_log, @@ -4448,8 +4450,8 @@ reStudy: r->check_substr = r->anchored_substr; r->check_utf8 = r->anchored_utf8; r->check_offset_min = r->check_offset_max = r->anchored_offset; - if (r->reganch & ROPT_ANCH_SINGLE) - r->reganch |= ROPT_NOSCAN; + if (r->extflags & RXf_ANCH_SINGLE) + r->extflags |= RXf_NOSCAN; } else { r->check_end_shift = r->float_end_shift; @@ -4460,10 +4462,10 @@ reStudy: } /* XXXX Currently intuiting is not compatible with ANCH_GPOS. This should be changed ASAP! */ - if ((r->check_substr || r->check_utf8) && !(r->reganch & ROPT_ANCH_GPOS)) { - r->reganch |= RE_USE_INTUIT; + if ((r->check_substr || r->check_utf8) && !(r->extflags & RXf_ANCH_GPOS)) { + r->extflags |= RXf_USE_INTUIT; if (SvTAIL(r->check_substr ? r->check_substr : r->check_utf8)) - r->reganch |= RE_INTUIT_TAIL; + r->extflags |= RXf_INTUIT_TAIL; } /* XXX Unneeded? dmq (shouldn't as this is handled elsewhere) if ( (STRLEN)minlen < longest_float_length ) @@ -4504,7 +4506,7 @@ reStudy: (struct regnode_charclass_class*)RExC_rx->data->data[n], struct regnode_charclass_class); r->regstclass = (regnode*)RExC_rx->data->data[n]; - r->reganch &= ~ROPT_SKIP; /* Used in find_byclass(). */ + r->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */ DEBUG_COMPILE_r({ SV* sv = sv_newmortal(); regprop(r, sv, (regnode*)data.start_class); PerlIO_printf(Perl_debug_log, @@ -4524,17 +4526,17 @@ reStudy: r->minlen = minlen; if (RExC_seen & REG_SEEN_GPOS) - r->reganch |= ROPT_GPOS_SEEN; + r->extflags |= RXf_GPOS_SEEN; if (RExC_seen & REG_SEEN_LOOKBEHIND) - r->reganch |= ROPT_LOOKBEHIND_SEEN; + r->extflags |= RXf_LOOKBEHIND_SEEN; if (RExC_seen & REG_SEEN_EVAL) - r->reganch |= ROPT_EVAL_SEEN; + r->extflags |= RXf_EVAL_SEEN; if (RExC_seen & REG_SEEN_CANY) - r->reganch |= ROPT_CANY_SEEN; + r->extflags |= RXf_CANY_SEEN; if (RExC_seen & REG_SEEN_VERBARG) - r->reganch |= ROPT_VERBARG_SEEN; + r->intflags |= PREGf_VERBARG_SEEN; if (RExC_seen & REG_SEEN_CUTGROUP) - r->reganch |= ROPT_CUTGROUP_SEEN; + r->intflags |= PREGf_CUTGROUP_SEEN; if (RExC_paren_names) r->paren_names = (HV*)SvREFCNT_inc(RExC_paren_names); else @@ -6070,9 +6072,9 @@ tryagain: case '^': RExC_seen_zerolen++; nextchar(pRExC_state); - if (RExC_flags & PMf_MULTILINE) + if (RExC_flags & RXf_PMf_MULTILINE) ret = reg_node(pRExC_state, MBOL); - else if (RExC_flags & PMf_SINGLELINE) + else if (RExC_flags & RXf_PMf_SINGLELINE) ret = reg_node(pRExC_state, SBOL); else ret = reg_node(pRExC_state, BOL); @@ -6082,9 +6084,9 @@ tryagain: nextchar(pRExC_state); if (*RExC_parse) RExC_seen_zerolen++; - if (RExC_flags & PMf_MULTILINE) + if (RExC_flags & RXf_PMf_MULTILINE) ret = reg_node(pRExC_state, MEOL); - else if (RExC_flags & PMf_SINGLELINE) + else if (RExC_flags & RXf_PMf_SINGLELINE) ret = reg_node(pRExC_state, SEOL); else ret = reg_node(pRExC_state, EOL); @@ -6092,7 +6094,7 @@ tryagain: break; case '.': nextchar(pRExC_state); - if (RExC_flags & PMf_SINGLELINE) + if (RExC_flags & RXf_PMf_SINGLELINE) ret = reg_node(pRExC_state, SANY); else ret = reg_node(pRExC_state, REG_ANY); @@ -6396,7 +6398,7 @@ tryagain: break; case '#': - if (RExC_flags & PMf_EXTENDED) { + if (RExC_flags & RXf_PMf_EXTENDED) { while (RExC_parse < RExC_end && *RExC_parse != '\n') RExC_parse++; if (RExC_parse < RExC_end) @@ -6427,7 +6429,7 @@ tryagain: { char * const oldp = p; - if (RExC_flags & PMf_EXTENDED) + if (RExC_flags & RXf_PMf_EXTENDED) p = regwhite(p, RExC_end); switch (*p) { case '^': @@ -6563,7 +6565,7 @@ tryagain: ender = *p++; break; } - if (RExC_flags & PMf_EXTENDED) + if (RExC_flags & RXf_PMf_EXTENDED) p = regwhite(p, RExC_end); if (UTF && FOLD) { /* Prime the casefolded buffer. */ @@ -7719,7 +7721,7 @@ S_nextchar(pTHX_ RExC_state_t *pRExC_state) RExC_parse++; continue; } - if (RExC_flags & PMf_EXTENDED) { + if (RExC_flags & RXf_PMf_EXTENDED) { if (isSPACE(*RExC_parse)) { RExC_parse++; continue; @@ -8128,9 +8130,9 @@ Perl_regdump(pTHX_ const regexp *r) (r->check_substr == r->float_substr && r->check_utf8 == r->float_utf8 ? "(checking floating" : "(checking anchored")); - if (r->reganch & ROPT_NOSCAN) + if (r->extflags & RXf_NOSCAN) PerlIO_printf(Perl_debug_log, " noscan"); - if (r->reganch & ROPT_CHECK_ALL) + if (r->extflags & RXf_CHECK_ALL) PerlIO_printf(Perl_debug_log, " isall"); if (r->check_substr || r->check_utf8) PerlIO_printf(Perl_debug_log, ") "); @@ -8139,26 +8141,26 @@ Perl_regdump(pTHX_ const regexp *r) regprop(r, sv, r->regstclass); PerlIO_printf(Perl_debug_log, "stclass %s ", SvPVX_const(sv)); } - if (r->reganch & ROPT_ANCH) { + if (r->extflags & RXf_ANCH) { PerlIO_printf(Perl_debug_log, "anchored"); - if (r->reganch & ROPT_ANCH_BOL) + if (r->extflags & RXf_ANCH_BOL) PerlIO_printf(Perl_debug_log, "(BOL)"); - if (r->reganch & ROPT_ANCH_MBOL) + if (r->extflags & RXf_ANCH_MBOL) PerlIO_printf(Perl_debug_log, "(MBOL)"); - if (r->reganch & ROPT_ANCH_SBOL) + if (r->extflags & RXf_ANCH_SBOL) PerlIO_printf(Perl_debug_log, "(SBOL)"); - if (r->reganch & ROPT_ANCH_GPOS) + if (r->extflags & RXf_ANCH_GPOS) PerlIO_printf(Perl_debug_log, "(GPOS)"); PerlIO_putc(Perl_debug_log, ' '); } - if (r->reganch & ROPT_GPOS_SEEN) + if (r->extflags & RXf_GPOS_SEEN) PerlIO_printf(Perl_debug_log, "GPOS:%"UVuf" ", r->gofs); - if (r->reganch & ROPT_SKIP) + if (r->intflags & PREGf_SKIP) PerlIO_printf(Perl_debug_log, "plus "); - if (r->reganch & ROPT_IMPLICIT) + if (r->intflags & PREGf_IMPLICIT) PerlIO_printf(Perl_debug_log, "implicit "); PerlIO_printf(Perl_debug_log, "minlen %ld ", (long) r->minlen); - if (r->reganch & ROPT_EVAL_SEEN) + if (r->extflags & RXf_EVAL_SEEN) PerlIO_printf(Perl_debug_log, "with eval "); PerlIO_printf(Perl_debug_log, "\n"); #else @@ -8463,7 +8465,7 @@ Perl_pregfree(pTHX_ struct regexp *r) reginitcolors(); { SV *dsv= sv_newmortal(); - RE_PV_QUOTED_DECL(s, (r->reganch & ROPT_UTF8), + RE_PV_QUOTED_DECL(s, (r->extflags & RXf_UTF8), dsv, r->precomp, r->prelen, 60); PerlIO_printf(Perl_debug_log,"%sFreeing REx:%s %s\n", PL_colors[4],PL_colors[5],s); @@ -8668,7 +8670,7 @@ Perl_regdupe(pTHX_ const regexp *r, CLONE_PARAMS *param) for (i = 0; i < count; i++) { d->what[i] = r->data->what[i]; switch (d->what[i]) { - /* legal options are one of: sSfpont + /* legal options are one of: sSfpontT see also regcomp.h and pregfree() */ case 's': case 'S': @@ -8685,8 +8687,8 @@ Perl_regdupe(pTHX_ const regexp *r, CLONE_PARAMS *param) ret->regstclass = (regnode*)d->data[i]; break; case 'o': - /* Compiled op trees are readonly, and can thus be - shared without duplication. */ + /* Compiled op trees are readonly and in shared memory, + and can thus be shared without duplication. */ OP_REFCNT_LOCK; d->data[i] = (void*)OpREFCNT_inc((OP*)r->data->data[i]); OP_REFCNT_UNLOCK; @@ -8732,7 +8734,8 @@ Perl_regdupe(pTHX_ const regexp *r, CLONE_PARAMS *param) ret->nparens = r->nparens; ret->lastparen = r->lastparen; ret->lastcloseparen = r->lastcloseparen; - ret->reganch = r->reganch; + ret->intflags = r->intflags; + ret->extflags = r->extflags; ret->sublen = r->sublen; @@ -8793,7 +8796,7 @@ Perl_reg_stringify(pTHX_ MAGIC *mg, STRLEN *lp, U32 *flags, I32 *haseval ) { int left = 0; int right = 4; bool need_newline = 0; - U16 reganch = (U16)((re->reganch & PMf_COMPILETIME) >> 12); + U16 reganch = (U16)((re->extflags & RXf_PMf_COMPILETIME) >> 12); while((ch = *fptr++)) { if(reganch & 1) { @@ -8821,7 +8824,7 @@ Perl_reg_stringify(pTHX_ MAGIC *mg, STRLEN *lp, U32 *flags, I32 *haseval ) { * ourself. If we find a '\n' first (or if we don't find '#' or '\n'), * we don't need to add anything. -jfriedl */ - if (PMf_EXTENDED & re->reganch) { + if (PMf_EXTENDED & re->extflags) { const char *endptr = re->precomp + re->prelen; while (endptr >= re->precomp) { const char c = *(endptr--); @@ -8850,7 +8853,7 @@ Perl_reg_stringify(pTHX_ MAGIC *mg, STRLEN *lp, U32 *flags, I32 *haseval ) { if (haseval) *haseval = re->program[0].next_off; if (flags) - *flags = ((re->reganch & ROPT_UTF8) ? 1 : 0); + *flags = ((re->extflags & RXf_UTF8) ? 1 : 0); if (lp) *lp = mg->mg_len; @@ -371,7 +371,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, I32 ml_anch; register char *other_last = NULL; /* other substr checked before this */ char *check_at = NULL; /* check substr found at this pos */ - const I32 multiline = prog->reganch & PMf_MULTILINE; + const I32 multiline = prog->extflags & RXf_PMf_MULTILINE; #ifdef DEBUGGING const char * const i_strpos = strpos; #endif @@ -380,7 +380,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, RX_MATCH_UTF8_set(prog,do_utf8); - if (prog->reganch & ROPT_UTF8) { + if (prog->extflags & RXf_UTF8) { PL_reg_flags |= RF_utf8; } DEBUG_EXECUTE_r( @@ -412,14 +412,14 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, "Non-utf8 string cannot match utf8 check string\n")); goto fail; } - if (prog->reganch & ROPT_ANCH) { /* Match at beg-of-str or after \n */ - ml_anch = !( (prog->reganch & ROPT_ANCH_SINGLE) - || ( (prog->reganch & ROPT_ANCH_BOL) + if (prog->extflags & RXf_ANCH) { /* Match at beg-of-str or after \n */ + ml_anch = !( (prog->extflags & RXf_ANCH_SINGLE) + || ( (prog->extflags & RXf_ANCH_BOL) && !multiline ) ); /* Check after \n? */ if (!ml_anch) { - if ( !(prog->reganch & (ROPT_ANCH_GPOS /* Checked by the caller */ - | ROPT_IMPLICIT)) /* not a real BOL */ + if ( !(prog->extflags & RXf_ANCH_GPOS) /* Checked by the caller */ + && !(prog->intflags & PREGf_IMPLICIT) /* not a real BOL */ /* SvCUR is not set on references: SvRV and SvPVX_const overlap */ && sv && !SvROK(sv) && (strpos != strbeg)) { @@ -427,7 +427,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, goto fail; } if (prog->check_offset_min == prog->check_offset_max && - !(prog->reganch & ROPT_CANY_SEEN)) { + !(prog->extflags & RXf_CANY_SEEN)) { /* Substring at constant offset from beg-of-str... */ I32 slen; @@ -528,7 +528,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, else { U8* start_point; U8* end_point; - if (prog->reganch & ROPT_CANY_SEEN) { + if (prog->extflags & RXf_CANY_SEEN) { start_point= (U8*)(s + srch_start_shift); end_point= (U8*)(strend - srch_end_shift); } else { @@ -814,7 +814,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, if (ml_anch && sv && !SvROK(sv) /* See prev comment on SvROK */ && (strpos != strbeg) && strpos[-1] != '\n' /* May be due to an implicit anchor of m{.*foo} */ - && !(prog->reganch & ROPT_IMPLICIT)) + && !(prog->intflags & PREGf_IMPLICIT)) { t = strpos; goto find_anchor; @@ -824,7 +824,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]); ); success_at_start: - if (!(prog->reganch & ROPT_NAUGHTY) /* XXXX If strpos moved? */ + if (!(prog->intflags & PREGf_NAUGHTY) /* XXXX If strpos moved? */ && (do_utf8 ? ( prog->check_utf8 /* Could be deleted already */ && --BmUSEFUL(prog->check_utf8) < 0 @@ -847,7 +847,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, /* XXXX This is a remnant of the old implementation. It looks wasteful, since now INTUIT can use many other heuristics. */ - prog->reganch &= ~RE_USE_INTUIT; + prog->extflags &= ~RXf_USE_INTUIT; } else s = strpos; @@ -894,7 +894,7 @@ Perl_re_intuit_start(pTHX_ regexp *prog, SV *sv, char *strpos, } DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, "This position contradicts STCLASS...\n") ); - if ((prog->reganch & ROPT_ANCH) && !ml_anch) + if ((prog->extflags & RXf_ANCH) && !ml_anch) goto fail; /* Contradict one of substrings */ if (prog->anchored_substr || prog->anchored_utf8) { @@ -1126,7 +1126,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char *strend, regmatch_info *reginfo) { dVAR; - const I32 doevery = (prog->reganch & ROPT_SKIP) == 0; + const I32 doevery = (prog->intflags & PREGf_SKIP) == 0; char *m; STRLEN ln; STRLEN lnc; @@ -1665,7 +1665,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * return 0; } - multiline = prog->reganch & PMf_MULTILINE; + multiline = prog->extflags & RXf_PMf_MULTILINE; reginfo.prog = prog; RX_MATCH_UTF8_set(prog, do_utf8); @@ -1692,7 +1692,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * PL_reg_eval_set = 0; PL_reg_maxiter = 0; - if (prog->reganch & ROPT_UTF8) + if (prog->extflags & RXf_UTF8) PL_reg_flags |= RF_utf8; /* Mark beginning of line for ^ and lookbehind. */ @@ -1709,7 +1709,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * /* If there is a "must appear" string, look for it. */ s = startpos; - if (prog->reganch & ROPT_GPOS_SEEN) { /* Need to set reginfo->ganch */ + if (prog->extflags & RXf_GPOS_SEEN) { /* Need to set reginfo->ganch */ MAGIC *mg; if (flags & REXEC_IGNOREPOS) /* Means: check only at start */ @@ -1719,7 +1719,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * && (mg = mg_find(sv, PERL_MAGIC_regex_global)) && mg->mg_len >= 0) { reginfo.ganch = strbeg + mg->mg_len; /* Defined pos() */ - if (prog->reganch & ROPT_ANCH_GPOS) { + if (prog->extflags & RXf_ANCH_GPOS) { if (s > reginfo.ganch) goto phooey; s = reginfo.ganch - prog->gofs; @@ -1768,11 +1768,11 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * /* Simplest case: anchored match need be tried only once. */ /* [unless only anchor is BOL and multiline is set] */ - if (prog->reganch & (ROPT_ANCH & ~ROPT_ANCH_GPOS)) { + if (prog->extflags & (RXf_ANCH & ~RXf_ANCH_GPOS)) { if (s == startpos && regtry(®info, &startpos)) goto got_it; - else if (multiline || (prog->reganch & ROPT_IMPLICIT) - || (prog->reganch & ROPT_ANCH_MBOL)) /* XXXX SBOL? */ + else if (multiline || (prog->intflags & PREGf_IMPLICIT) + || (prog->extflags & RXf_ANCH_MBOL)) /* XXXX SBOL? */ { char *end; @@ -1789,7 +1789,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * after_try: if (s >= end) goto phooey; - if (prog->reganch & RE_USE_INTUIT) { + if (prog->extflags & RXf_USE_INTUIT) { s = re_intuit_start(prog, sv, s + 1, strend, flags, NULL); if (!s) goto phooey; @@ -1809,10 +1809,10 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * } } goto phooey; - } else if (ROPT_GPOS_CHECK == (prog->reganch & ROPT_GPOS_CHECK)) + } else if (RXf_GPOS_CHECK == (prog->extflags & RXf_GPOS_CHECK)) { /* the warning about reginfo.ganch being used without intialization - is bogus -- we set it above, when prog->reganch & ROPT_GPOS_SEEN + is bogus -- we set it above, when prog->extflags & RXf_GPOS_SEEN and we only enter this block when the same bit is set. */ char *tmp_s = reginfo.ganch - prog->gofs; if (regtry(®info, &tmp_s)) @@ -1821,7 +1821,7 @@ Perl_regexec_flags(pTHX_ register regexp *prog, char *stringarg, register char * } /* Messy cases: unanchored match. */ - if ((prog->anchored_substr || prog->anchored_utf8) && prog->reganch & ROPT_SKIP) { + if ((prog->anchored_substr || prog->anchored_utf8) && prog->intflags & PREGf_SKIP) { /* we have /x+whatever/ */ /* it must be a one character string (XXXX Except UTF?) */ char ch; @@ -2128,7 +2128,7 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startpos) GET_RE_DEBUG_FLAGS_DECL; reginfo->cutpoint=NULL; - if ((prog->reganch & ROPT_EVAL_SEEN) && !PL_reg_eval_set) { + if ((prog->extflags & RXf_EVAL_SEEN) && !PL_reg_eval_set) { MAGIC *mg; PL_reg_eval_set = RS_init; @@ -2473,7 +2473,7 @@ STATIC void S_debug_start_match(pTHX_ const regexp *prog, const bool do_utf8, const char *start, const char *end, const char *blurb) { - const bool utf8_pat= prog->reganch & ROPT_UTF8 ? 1 : 0; + const bool utf8_pat= prog->extflags & RXf_UTF8 ? 1 : 0; if (!PL_colorset) reginitcolors(); { @@ -3637,7 +3637,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) PL_reg_maxiter = 0; ST.toggle_reg_flags = PL_reg_flags; - if (re->reganch & ROPT_UTF8) + if (re->extflags & RXf_UTF8) PL_reg_flags |= RF_utf8; else PL_reg_flags &= ~RF_utf8; @@ -5028,7 +5028,7 @@ no_silent: result = 0; final_exit: - if (rex->reganch & ROPT_VERBARG_SEEN) { + if (rex->intflags & PREGf_VERBARG_SEEN) { SV *sv_err = get_sv("REGERROR", 1); SV *sv_mrk = get_sv("REGMARK", 1); if (result) { @@ -31,38 +31,67 @@ struct reg_substr_data; struct reg_data; struct regexp_engine; + typedef struct regexp_paren_ofs { I32 *startp; I32 *endp; } regexp_paren_ofs; -typedef struct regexp { - I32 *startp; - I32 *endp; - regexp_paren_ofs *swap; - regnode *regstclass; - struct reg_substr_data *substrs; - char *precomp; /* pre-compilation regular expression */ - struct reg_data *data; /* Additional data. */ - char *subbeg; /* saved or original string - so \digit works forever. */ #ifdef PERL_OLD_COPY_ON_WRITE - SV *saved_copy; /* If non-NULL, SV which is COW from original */ +#define SV_SAVED_COPY SV *saved_copy; /* If non-NULL, SV which is COW from original */ +#else +#define SV_SAVED_COPY #endif - U32 *offsets; /* offset annotations 20001228 MJD */ - I32 sublen; /* Length of string pointed by subbeg */ - I32 refcnt; + +typedef struct regexp { + /* Generic details */ + const struct regexp_engine* engine; /* what created this regexp? */ + I32 refcnt; /* Refcount of this regexp */ + + /* The original string as passed to the compilation routine */ + char *precomp; /* pre-compilation regular expression */ + I32 prelen; /* length of precomp */ + + /* Used for generic optimisations by the perl core. + All engines are expected to provide this information. */ + U32 extflags; /* Flags used both externally and internally */ I32 minlen; /* mininum possible length of string to match */ I32 minlenret; /* mininum possible length of $& */ U32 gofs; /* chars left of pos that we search from */ - I32 prelen; /* length of precomp */ - U32 nparens; /* number of parentheses */ - U32 lastparen; /* last paren matched */ - U32 lastcloseparen; /* last paren matched */ - U32 reganch; /* Internal use only + - Tainted information used by regexec? */ - HV *paren_names; /* Paren names */ - const struct regexp_engine* engine; + U32 nparens; /* number of capture buffers */ + HV *paren_names; /* Optional hash of paren names */ + struct reg_substr_data *substrs; /* substring data about strings that must appear + in the final match, used for optimisations */ + + /* Data about the last/current match. Used by the core and therefore + must be populated by all engines. */ + char *subbeg; /* saved or original string + so \digit works forever. */ + I32 sublen; /* Length of string pointed by subbeg */ + I32 *startp; /* Array of offsets from start of string (@-) */ + I32 *endp; /* Array of offsets from start of string (@+) */ + + SV_SAVED_COPY /* If non-NULL, SV which is COW from original */ + U32 lastparen; /* last open paren matched */ + U32 lastcloseparen; /* last close paren matched */ + + /* Perl Regex Engine specific data. Other engines shouldn't need + to touch this. Should be refactored out into a different structure + and accessed via the *pprivate field. (except intflags) */ + U32 intflags; /* Internal flags */ + void *pprivate; /* Data private to the regex engine which + created this object. Perl will never mess with + this member at all. */ + regexp_paren_ofs *swap; /* Swap copy of *startp / *endp */ + U32 *offsets; /* offset annotations 20001228 MJD + data about mapping the program to the + string*/ + regnode *regstclass; /* Optional startclass as identified or constructed + by the optimiser */ + struct reg_data *data; /* Additional miscellaneous data used by the program. + Used to make it easier to clone and free arbitrary + data that the regops need. Often the ARG field of + a regop is an index into this structure */ regnode program[1]; /* Unwarranted chumminess with compiler. */ } regexp; @@ -89,59 +118,82 @@ typedef struct regexp_engine { #endif } regexp_engine; -#define ROPT_ANCH (ROPT_ANCH_BOL|ROPT_ANCH_MBOL|ROPT_ANCH_GPOS|ROPT_ANCH_SBOL) -#define ROPT_ANCH_SINGLE (ROPT_ANCH_SBOL|ROPT_ANCH_GPOS) -#define ROPT_ANCH_BOL 0x00000001 -#define ROPT_ANCH_MBOL 0x00000002 -#define ROPT_ANCH_SBOL 0x00000004 -#define ROPT_ANCH_GPOS 0x00000008 -#define ROPT_SKIP 0x00000010 -#define ROPT_IMPLICIT 0x00000020 /* Converted .* to ^.* */ -#define ROPT_NOSCAN 0x00000040 /* Check-string always at start. */ -#define ROPT_GPOS_SEEN 0x00000080 -#define ROPT_CHECK_ALL 0x00000100 -#define ROPT_LOOKBEHIND_SEEN 0x00000200 -#define ROPT_EVAL_SEEN 0x00000400 -#define ROPT_CANY_SEEN 0x00000800 -#define ROPT_SANY_SEEN ROPT_CANY_SEEN /* src bckwrd cmpt */ -#define ROPT_GPOS_CHECK (ROPT_GPOS_SEEN|ROPT_ANCH_GPOS) - -/* 0xF800 of reganch is used by PMf_COMPILETIME */ - -#define ROPT_UTF8 0x00010000 -#define ROPT_NAUGHTY 0x00020000 /* how exponential is this pattern? */ -#define ROPT_COPY_DONE 0x00040000 /* subbeg is a copy of the string */ -#define ROPT_TAINTED_SEEN 0x00080000 -#define ROPT_MATCH_UTF8 0x10000000 /* subbeg is utf-8 */ -#define ROPT_VERBARG_SEEN 0x20000000 -#define ROPT_CUTGROUP_SEEN 0x40000000 -#define ROPT_GPOS_FLOAT 0x80000000 - -#define RE_USE_INTUIT_NOML 0x00100000 /* Best to intuit before matching */ -#define RE_USE_INTUIT_ML 0x00200000 -#define REINT_AUTORITATIVE_NOML 0x00400000 /* Can trust a positive answer */ -#define REINT_AUTORITATIVE_ML 0x00800000 -#define REINT_ONCE_NOML 0x01000000 /* Intuit can succed once only. */ -#define REINT_ONCE_ML 0x02000000 -#define RE_INTUIT_ONECHAR 0x04000000 -#define RE_INTUIT_TAIL 0x08000000 - - -#define RE_USE_INTUIT (RE_USE_INTUIT_NOML|RE_USE_INTUIT_ML) -#define REINT_AUTORITATIVE (REINT_AUTORITATIVE_NOML|REINT_AUTORITATIVE_ML) -#define REINT_ONCE (REINT_ONCE_NOML|REINT_ONCE_ML) - -#define RX_HAS_CUTGROUP(prog) ((prog)->reganch & ROPT_CUTGROUP_SEEN) -#define RX_MATCH_TAINTED(prog) ((prog)->reganch & ROPT_TAINTED_SEEN) -#define RX_MATCH_TAINTED_on(prog) ((prog)->reganch |= ROPT_TAINTED_SEEN) -#define RX_MATCH_TAINTED_off(prog) ((prog)->reganch &= ~ROPT_TAINTED_SEEN) +/* + * Flags stored in regexp->intflags + * These are used only internally to the regexp engine + */ +#define PREGf_SKIP 0x00000001 +#define PREGf_IMPLICIT 0x00000002 /* Converted .* to ^.* */ +#define PREGf_NAUGHTY 0x00000004 /* how exponential is this pattern? */ +#define PREGf_VERBARG_SEEN 0x00000008 +#define PREGf_CUTGROUP_SEEN 0x00000010 + + +/* Flags stored in regexp->extflags + * These are used by code external to the regexp engine + */ + +/* Anchor and GPOS related stuff */ +#define RXf_ANCH_BOL 0x00000001 +#define RXf_ANCH_MBOL 0x00000002 +#define RXf_ANCH_SBOL 0x00000004 +#define RXf_ANCH_GPOS 0x00000008 +#define RXf_GPOS_SEEN 0x00000010 +#define RXf_GPOS_FLOAT 0x00000020 +/* five bits here */ +#define RXf_ANCH (RXf_ANCH_BOL|RXf_ANCH_MBOL|RXf_ANCH_GPOS|RXf_ANCH_SBOL) +#define RXf_GPOS_CHECK (RXf_GPOS_SEEN|RXf_ANCH_GPOS) +#define RXf_ANCH_SINGLE (RXf_ANCH_SBOL|RXf_ANCH_GPOS) +/* + * 0xF800 of extflags is used by PMf_COMPILETIME + * These are the regex equivelent of the PMf_xyz stuff defined + * in op.h + */ +#define RXf_PMf_LOCALE 0x00000800 +#define RXf_PMf_MULTILINE 0x00001000 +#define RXf_PMf_SINGLELINE 0x00002000 +#define RXf_PMf_FOLD 0x00004000 +#define RXf_PMf_EXTENDED 0x00008000 +#define RXf_PMf_COMPILETIME (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_LOCALE|RXf_PMf_FOLD|RXf_PMf_EXTENDED) + +/* What we have seen */ +/* one bit here */ +#define RXf_LOOKBEHIND_SEEN 0x00020000 +#define RXf_EVAL_SEEN 0x00040000 +#define RXf_CANY_SEEN 0x00080000 + +/* Special */ +#define RXf_NOSCAN 0x00100000 +#define RXf_CHECK_ALL 0x00200000 + +/* UTF8 related */ +#define RXf_UTF8 0x00400000 +#define RXf_MATCH_UTF8 0x00800000 + +/* Intuit related */ +#define RXf_USE_INTUIT_NOML 0x01000000 +#define RXf_USE_INTUIT_ML 0x02000000 +#define RXf_INTUIT_TAIL 0x04000000 +/* one bit here */ +#define RXf_USE_INTUIT (RXf_USE_INTUIT_NOML|RXf_USE_INTUIT_ML) + +/* Copy and tainted info */ +#define RXf_COPY_DONE 0x10000000 +#define RXf_TAINTED_SEEN 0x20000000 +/* two bits here */ + + +#define RX_HAS_CUTGROUP(prog) ((prog)->intflags & PREGf_CUTGROUP_SEEN) +#define RX_MATCH_TAINTED(prog) ((prog)->extflags & RXf_TAINTED_SEEN) +#define RX_MATCH_TAINTED_on(prog) ((prog)->extflags |= RXf_TAINTED_SEEN) +#define RX_MATCH_TAINTED_off(prog) ((prog)->extflags &= ~RXf_TAINTED_SEEN) #define RX_MATCH_TAINTED_set(prog, t) ((t) \ ? RX_MATCH_TAINTED_on(prog) \ : RX_MATCH_TAINTED_off(prog)) -#define RX_MATCH_COPIED(prog) ((prog)->reganch & ROPT_COPY_DONE) -#define RX_MATCH_COPIED_on(prog) ((prog)->reganch |= ROPT_COPY_DONE) -#define RX_MATCH_COPIED_off(prog) ((prog)->reganch &= ~ROPT_COPY_DONE) +#define RX_MATCH_COPIED(prog) ((prog)->extflags & RXf_COPY_DONE) +#define RX_MATCH_COPIED_on(prog) ((prog)->extflags |= RXf_COPY_DONE) +#define RX_MATCH_COPIED_off(prog) ((prog)->extflags &= ~RXf_COPY_DONE) #define RX_MATCH_COPIED_set(prog,t) ((t) \ ? RX_MATCH_COPIED_on(prog) \ : RX_MATCH_COPIED_off(prog)) @@ -167,9 +219,9 @@ typedef struct regexp_engine { }} STMT_END #endif -#define RX_MATCH_UTF8(prog) ((prog)->reganch & ROPT_MATCH_UTF8) -#define RX_MATCH_UTF8_on(prog) ((prog)->reganch |= ROPT_MATCH_UTF8) -#define RX_MATCH_UTF8_off(prog) ((prog)->reganch &= ~ROPT_MATCH_UTF8) +#define RX_MATCH_UTF8(prog) ((prog)->extflags & RXf_MATCH_UTF8) +#define RX_MATCH_UTF8_on(prog) ((prog)->extflags |= RXf_MATCH_UTF8) +#define RX_MATCH_UTF8_off(prog) ((prog)->extflags &= ~RXf_MATCH_UTF8) #define RX_MATCH_UTF8_set(prog, t) ((t) \ ? (RX_MATCH_UTF8_on(prog), (PL_reg_match_utf8 = 1)) \ : (RX_MATCH_UTF8_off(prog), (PL_reg_match_utf8 = 0))) |