diff options
-rw-r--r-- | dump.c | 21 | ||||
-rw-r--r-- | ext/Devel-Peek/t/Peek.t | 2 | ||||
-rw-r--r-- | op.c | 13 | ||||
-rw-r--r-- | op.h | 2 | ||||
-rw-r--r-- | op_reg_common.h | 23 | ||||
-rw-r--r-- | pod/perlreapi.pod | 8 | ||||
-rw-r--r-- | pp.c | 7 | ||||
-rw-r--r-- | pp_ctl.c | 19 | ||||
-rw-r--r-- | pp_hot.c | 2 | ||||
-rw-r--r-- | regcomp.c | 30 | ||||
-rw-r--r-- | regexp.h | 35 | ||||
-rw-r--r-- | regnodes.h | 10 | ||||
-rw-r--r-- | t/op/split.t | 35 | ||||
-rw-r--r-- | t/re/recompile.t | 8 |
14 files changed, 151 insertions, 64 deletions
@@ -672,6 +672,8 @@ S_pm_description(pTHX_ const PMOP *pm) if (RX_EXTFLAGS(regex) & RXf_CHECK_ALL) sv_catpv(desc, ",ALL"); } + if (RX_EXTFLAGS(regex) & RXf_SKIPWHITE) + sv_catpv(desc, ",SKIPWHITE"); } append_flags(desc, pmflags, pmflags_flags_names); @@ -1449,7 +1451,7 @@ const struct flag_to_name regexp_flags_names[] = { {RXf_ANCH_GPOS, "ANCH_GPOS,"}, {RXf_GPOS_SEEN, "GPOS_SEEN,"}, {RXf_GPOS_FLOAT, "GPOS_FLOAT,"}, - {RXf_LOOKBEHIND_SEEN, "LOOKBEHIND_SEEN,"}, + {RXf_NO_INPLACE_SUBST, "NO_INPLACE_SUBST,"}, {RXf_EVAL_SEEN, "EVAL_SEEN,"}, {RXf_CANY_SEEN, "CANY_SEEN,"}, {RXf_NOSCAN, "NOSCAN,"}, @@ -1458,10 +1460,12 @@ const struct flag_to_name regexp_flags_names[] = { {RXf_USE_INTUIT_NOML, "USE_INTUIT_NOML,"}, {RXf_USE_INTUIT_ML, "USE_INTUIT_ML,"}, {RXf_INTUIT_TAIL, "INTUIT_TAIL,"}, + {RXf_SPLIT, "SPLIT,"}, {RXf_COPY_DONE, "COPY_DONE,"}, {RXf_TAINTED_SEEN, "TAINTED_SEEN,"}, {RXf_TAINTED, "TAINTED,"}, {RXf_START_ONLY, "START_ONLY,"}, + {RXf_SKIPWHITE, "SKIPWHITE,"}, {RXf_WHITE, "WHITE,"}, {RXf_NULL, "NULL,"}, }; @@ -2091,7 +2095,17 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo dumpregexp: { struct regexp * const r = ReANY((REGEXP*)sv); - flags = RX_EXTFLAGS((REGEXP*)sv); + flags = r->compflags; + sv_setpv(d,""); + append_flags(d, flags, regexp_flags_names); + if (*(SvEND(d) - 1) == ',') { + SvCUR_set(d, SvCUR(d) - 1); + SvPVX(d)[SvCUR(d)] = '\0'; + } + Perl_dump_indent(aTHX_ level, file, " COMPFLAGS = 0x%"UVxf" (%s)\n", + (UV)(r->compflags), SvPVX_const(d)); + + flags = r->extflags; sv_setpv(d,""); append_flags(d, flags, regexp_flags_names); if (*(SvEND(d) - 1) == ',') { @@ -2099,7 +2113,8 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo SvPVX(d)[SvCUR(d)] = '\0'; } Perl_dump_indent(aTHX_ level, file, " EXTFLAGS = 0x%"UVxf" (%s)\n", - (UV)flags, SvPVX_const(d)); + (UV)(r->extflags), SvPVX_const(d)); + Perl_dump_indent(aTHX_ level, file, " INTFLAGS = 0x%"UVxf"\n", (UV)(r->intflags)); Perl_dump_indent(aTHX_ level, file, " NPARENS = %"UVuf"\n", diff --git a/ext/Devel-Peek/t/Peek.t b/ext/Devel-Peek/t/Peek.t index 1debcb55f5..3de46003f7 100644 --- a/ext/Devel-Peek/t/Peek.t +++ b/ext/Devel-Peek/t/Peek.t @@ -339,6 +339,7 @@ do_test('reference to regexp', STASH = $ADDR\\t"Regexp"' . ($] < 5.013 ? '' : ' + COMPFLAGS = 0x0 \(\) EXTFLAGS = 0x680000 \(CHECK_ALL,USE_INTUIT_NOML,USE_INTUIT_ML\) INTFLAGS = 0x0 NPARENS = 0 @@ -943,6 +944,7 @@ do_test('UTF-8 in a regular expression', PV = $ADDR "\(\?\^u:\\\\\\\\x\{100\}\)" \[UTF8 "\(\?\^u:\\\\\\\\x\{100\}\)"\] CUR = 13 STASH = $ADDR "Regexp" + COMPFLAGS = 0x0 \(\) EXTFLAGS = 0x680040 \(CHECK_ALL,USE_INTUIT_NOML,USE_INTUIT_ML\) INTFLAGS = 0x0 NPARENS = 0 @@ -4592,6 +4592,9 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg, I32 floor) U32 rx_flags = pm->op_pmflags & RXf_PMf_COMPILETIME; regexp_engine const *eng = current_re_engine(); + if (o->op_flags & OPf_SPECIAL) + rx_flags |= RXf_SPLIT; + if (!has_code || !eng->op_comp) { /* compile-time simple constant pattern */ @@ -4668,6 +4671,9 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg, I32 floor) pm->op_pmflags |= PMf_CODELIST_PRIVATE; } + if (o->op_flags & OPf_SPECIAL) + pm->op_pmflags |= PMf_SPLIT; + /* the OP_REGCMAYBE is a placeholder in the non-threaded case * to allow its op_next to be pointed past the regcomp and * preceding stacking ops; @@ -9755,15 +9761,10 @@ Perl_ck_split(pTHX_ OP *o) cLISTOPo->op_last = kid; /* There was only one element previously */ } - if (kid->op_type == OP_CONST && !(kid->op_private & OPpCONST_FOLDED)) { - SV * const sv = kSVOP->op_sv; - if (SvPOK(sv) && SvCUR(sv) == 1 && *SvPVX(sv) == ' ') - o->op_flags |= OPf_SPECIAL; - } if (kid->op_type != OP_MATCH || kid->op_flags & OPf_STACKED) { OP * const sibl = kid->op_sibling; kid->op_sibling = 0; - kid = pmruntime( newPMOP(OP_MATCH, 0), kid, 0, 0); + kid = pmruntime( newPMOP(OP_MATCH, OPf_SPECIAL), kid, 0, 0); /* OPf_SPECIAL is used to trigger split " " behavior */ if (cLISTOPo->op_first == cLISTOPo->op_last) cLISTOPo->op_last = kid; cLISTOPo->op_first = kid; @@ -115,7 +115,7 @@ Deprecated. Use C<GIMME_V> instead. /* On OP_ENTERSUB || OP_NULL, saw a "do". */ /* On OP_EXISTS, treat av as av, not avhv. */ /* On OP_(ENTER|LEAVE)EVAL, don't clear $@ */ - /* On OP_SPLIT, special split " " */ + /* On pushre, rx is used as part of split, e.g. split " " */ /* On regcomp, "use re 'eval'" was in scope */ /* On OP_READLINE, was <$filehandle> */ /* On RV2[ACGHS]V, don't create GV--in diff --git a/op_reg_common.h b/op_reg_common.h index eed483b75b..9dcdaaec63 100644 --- a/op_reg_common.h +++ b/op_reg_common.h @@ -27,6 +27,7 @@ * RXf_PMf_STD_PMMOD_SHIFT, followed by the p. See STD_PAT_MODS and * INT_PAT_MODS in regexp.h for the reason contiguity is needed */ /* Make sure to update lib/re.pm when changing these! */ +/* Make sure you keep the pure PMf_ versions below in sync */ #define RXf_PMf_MULTILINE (1 << (RXf_PMf_STD_PMMOD_SHIFT+0)) /* /m */ #define RXf_PMf_SINGLELINE (1 << (RXf_PMf_STD_PMMOD_SHIFT+1)) /* /s */ #define RXf_PMf_FOLD (1 << (RXf_PMf_STD_PMMOD_SHIFT+2)) /* /i */ @@ -79,13 +80,23 @@ get_regex_charset(const U32 flags) return (regex_charset) ((flags & RXf_PMf_CHARSET) >> _RXf_PMf_CHARSET_SHIFT); } +#define _RXf_PMf_SHIFT_COMPILETIME (RXf_PMf_STD_PMMOD_SHIFT+8) + +/* + Set in Perl_pmruntime if op_flags & OPf_SPECIAL, i.e. split. Will + be used by regex engines to check whether they should set + RXf_SKIPWHITE +*/ +#define RXf_PMf_SPLIT (1<<(RXf_PMf_STD_PMMOD_SHIFT+8)) + /* Next available bit after the above. Name begins with '_' so won't be * exported by B */ -#define _RXf_PMf_SHIFT_NEXT (RXf_PMf_STD_PMMOD_SHIFT+8) +#define _RXf_PMf_SHIFT_NEXT (RXf_PMf_STD_PMMOD_SHIFT+9) /* Mask of the above bits. These need to be transferred from op_pmflags to * re->extflags during compilation */ -#define RXf_PMf_COMPILETIME (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_CHARSET|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY) +#define RXf_PMf_COMPILETIME (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY|RXf_PMf_CHARSET) +#define RXf_PMf_FLAGCOPYMASK (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY|RXf_PMf_CHARSET|RXf_PMf_SPLIT) #if RXf_PMf_COMPILETIME > 255 # error RXf_PMf_COMPILETIME wont fit in U8 flags field of eval node @@ -97,18 +108,18 @@ get_regex_charset(const U32 flags) #define PMf_FOLD 1<<2 #define PMf_EXTENDED 1<<3 #define PMf_KEEPCOPY 1<<4 +#define PMf_CHARSET 7<<5 +#define PMf_SPLIT 1<<8 -#if PMf_MULTILINE != RXf_PMf_MULTILINE || PMf_SINGLELINE != RXf_PMf_SINGLELINE || PMf_FOLD != RXf_PMf_FOLD || PMf_EXTENDED != RXf_PMf_EXTENDED || PMf_KEEPCOPY != RXf_PMf_KEEPCOPY +#if PMf_MULTILINE != RXf_PMf_MULTILINE || PMf_SINGLELINE != RXf_PMf_SINGLELINE || PMf_FOLD != RXf_PMf_FOLD || PMf_EXTENDED != RXf_PMf_EXTENDED || PMf_KEEPCOPY != RXf_PMf_KEEPCOPY || PMf_SPLIT != RXf_PMf_SPLIT || PMf_CHARSET != RXf_PMf_CHARSET # error RXf_PMf defines are wrong #endif -#define PMf_COMPILETIME RXf_PMf_COMPILETIME - /* Error check that haven't left something out of this. This isn't done * directly in the #define because doing so confuses regcomp.pl. * (2**n - 1) is n 1 bits, so the below gets the contiguous bits between the * beginning and ending shifts */ -#if RXf_PMf_COMPILETIME != (((1 << (_RXf_PMf_SHIFT_NEXT))-1) \ +#if RXf_PMf_COMPILETIME != (((1 << (_RXf_PMf_SHIFT_COMPILETIME))-1) \ & (~((1 << RXf_PMf_STD_PMMOD_SHIFT)-1))) # error RXf_PMf_COMPILETIME is invalid #endif diff --git a/pod/perlreapi.pod b/pod/perlreapi.pod index f1b6fdcfbf..eaaa1790d5 100644 --- a/pod/perlreapi.pod +++ b/pod/perlreapi.pod @@ -223,11 +223,13 @@ Perl's engine sets this flag on empty patterns, this optimization makes C<split //> much faster than it would otherwise be. It's even faster than C<unpack>. -=item RXf_MODIFIES_VARS +=item RXf_NO_INPLACE_SUBST Added in perl 5.18.0, this flag indicates that a regular expression might -assign to non-magical variables (such as $REGMARK and $REGERROR) during -matching. C<s///> will skip certain optimisations when this is set. +perform an operation that would interfere with inplace substituion. For +instance it might contain lookbehind, or assign to non-magical variables +(such as $REGMARK and $REGERROR) during matching. C<s///> will skip +certain optimisations when this is set. =back @@ -5313,7 +5313,6 @@ PP(pp_split) STRLEN len; const char *s = SvPV_const(sv, len); const bool do_utf8 = DO_UTF8(sv); - const bool skipwhite = PL_op->op_flags & OPf_SPECIAL; const char *strend = s + len; PMOP *pm; REGEXP *rx; @@ -5346,7 +5345,7 @@ PP(pp_split) rx = PM_GETRE(pm); TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET && - (RX_EXTFLAGS(rx) & RXf_WHITE || skipwhite)); + (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE))); RX_MATCH_UTF8_set(rx, do_utf8); @@ -5386,7 +5385,7 @@ PP(pp_split) } base = SP - PL_stack_base; orig = s; - if (skipwhite) { + if (RX_EXTFLAGS(rx) & RXf_SKIPWHITE) { if (do_utf8) { while (isSPACE_utf8(s)) s += UTF8SKIP(s); @@ -5408,7 +5407,7 @@ PP(pp_split) if (!limit) limit = maxiters + 2; - if (RX_EXTFLAGS(rx) & RXf_WHITE || skipwhite) { + if (RX_EXTFLAGS(rx) & RXf_WHITE) { while (--limit) { m = s; /* this one uses 'm' and is a negative test */ @@ -83,7 +83,7 @@ PP(pp_regcomp) REGEXP *re = NULL; REGEXP *new_re; const regexp_engine *eng; - bool is_bare_re; + bool is_bare_re= FALSE; if (PL_op->op_flags & OPf_STACKED) { dMARK; @@ -107,14 +107,27 @@ PP(pp_regcomp) assert (re != (REGEXP*) &PL_sv_undef); eng = re ? RX_ENGINE(re) : current_re_engine(); + /* + In the below logic: these are basically the same - check if this regcomp is part of a split. + + (PL_op->op_pmflags & PMf_split ) + (PL_op->op_next->op_type == OP_PUSHRE) + + We could add a new mask for this and copy the PMf_split, if we did + some bit definition fiddling first. + + For now we leave this + */ + new_re = (eng->op_comp ? eng->op_comp : &Perl_re_op_compile )(aTHX_ args, nargs, pm->op_code_list, eng, re, &is_bare_re, - (pm->op_pmflags & RXf_PMf_COMPILETIME), + (pm->op_pmflags & RXf_PMf_FLAGCOPYMASK), pm->op_pmflags | (PL_op->op_flags & OPf_SPECIAL ? PMf_USE_RE_EVAL : 0)); + if (pm->op_pmflags & PMf_HAS_CV) ReANY(new_re)->qr_anoncv = (CV*) SvREFCNT_inc(PAD_SV(PL_op->op_targ)); @@ -145,11 +158,13 @@ PP(pp_regcomp) ReREFCNT_dec(new_re); new_re = tmp; } + if (re != new_re) { ReREFCNT_dec(re); PM_SETRE(pm, new_re); } + #ifndef INCOMPLETE_TAINTS if (TAINTING_get && TAINT_get) { SvTAINTED_on((SV*)new_re); @@ -2308,7 +2308,7 @@ PP(pp_subst) #endif && (I32)clen <= RX_MINLENRET(rx) && (once || !(r_flags & REXEC_COPY_STR)) - && !(RX_EXTFLAGS(rx) & (RXf_LOOKBEHIND_SEEN|RXf_MODIFIES_VARS)) + && !(RX_EXTFLAGS(rx) & RXf_NO_INPLACE_SUBST) && (!doutf8 || SvUTF8(TARG)) && !(rpm->op_pmflags & PMf_NONDESTRUCT)) { @@ -5338,6 +5338,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, SV **svp; + DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, + "Compiling List of SVs %d elements%s\n",pat_count, orig_rx_flags & RXf_SPLIT ? " for split" : "")); /* apply magic and RE overloading to each arg */ for (svp = patternp; svp < patternp + pat_count; svp++) { SV *rx = *svp; @@ -5506,6 +5508,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, *is_bare_re = TRUE; SvREFCNT_inc(re); Safefree(pRExC_state->code_blocks); + DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, + "Precompiled pattern%s\n", orig_rx_flags & RXf_SPLIT ? " for split" : "")); + return (REGEXP*)re; } } @@ -5518,6 +5523,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, bool is_code = 0; OP *o; + DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, + "Compiling OP_LIST%s\n", orig_rx_flags & RXf_SPLIT ? " for split" : "")); + pat = newSVpvn("", 0); SAVEFREESV(pat); if (code_is_utf8) @@ -5548,6 +5556,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, } else { assert(expr->op_type == OP_CONST); + DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log, + "Compiling OP_CONST%s\n", orig_rx_flags & RXf_SPLIT ? " for split" : "")); pat = cSVOPx_sv(expr); } } @@ -5641,13 +5651,19 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, } /* return old regex if pattern hasn't changed */ + /* XXX: note in the below we have to check the flags as well as the pattern. + * + * Things get a touch tricky as we have to compare the utf8 flag independently + * from the compile flags. + */ if ( old_re && !recompile - && !!RX_UTF8(old_re) == !!RExC_utf8 + && !!RX_UTF8(old_re) == !!RExC_utf8 + && ( RX_COMPFLAGS(old_re) == ( orig_rx_flags & RXf_PMf_FLAGCOPYMASK ) ) && RX_PRECOMP(old_re) && RX_PRELEN(old_re) == plen - && memEQ(RX_PRECOMP(old_re), exp, plen)) + && memEQ(RX_PRECOMP(old_re), exp, plen)) { /* with runtime code, always recompile */ runtime_code = S_has_runtime_code(aTHX_ pRExC_state, expr, pm_flags, @@ -5799,6 +5815,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, RXi_SET( r, ri ); r->engine= eng; r->extflags = rx_flags; + RXp_COMPFLAGS(r) = orig_rx_flags & RXf_PMf_FLAGCOPYMASK; + if (pm_flags & PMf_IS_QR) { ri->code_blocks = pRExC_state->code_blocks; ri->num_code_blocks = pRExC_state->num_code_blocks; @@ -6302,7 +6320,7 @@ reStudy: if (RExC_seen & REG_SEEN_GPOS) r->extflags |= RXf_GPOS_SEEN; if (RExC_seen & REG_SEEN_LOOKBEHIND) - r->extflags |= RXf_LOOKBEHIND_SEEN; + r->extflags |= RXf_NO_INPLACE_SUBST; /* inplace might break the lookbehind */ if (pRExC_state->num_code_blocks) r->extflags |= RXf_EVAL_SEEN; if (RExC_seen & REG_SEEN_CANY) @@ -6310,7 +6328,7 @@ reStudy: if (RExC_seen & REG_SEEN_VERBARG) { r->intflags |= PREGf_VERBARG_SEEN; - r->extflags |= RXf_MODIFIES_VARS; + r->extflags |= RXf_NO_INPLACE_SUBST; /* don't understand this! Yves */ } if (RExC_seen & REG_SEEN_CUTGROUP) r->intflags |= PREGf_CUTGROUP_SEEN; @@ -6327,13 +6345,15 @@ reStudy: regnode *next = NEXTOPER(first); U8 nop = OP(next); - if (PL_regkind[fop] == NOTHING && nop == END) r->extflags |= RXf_NULL; else if (PL_regkind[fop] == BOL && nop == END) r->extflags |= RXf_START_ONLY; else if (fop == PLUS && PL_regkind[nop] == POSIXD && FLAGS(next) == _CC_SPACE && OP(regnext(first)) == END) r->extflags |= RXf_WHITE; + else if ( r->extflags & RXf_SPLIT && fop == EXACT && STR_LEN(first) == 1 && *(STRING(first)) == ' ' && OP(regnext(first)) == END ) + r->extflags |= (RXf_SKIPWHITE|RXf_WHITE); + } #ifdef DEBUGGING if (RExC_paren_names) { @@ -130,6 +130,9 @@ struct reg_code_block { /* Information about the match that isn't often used */ \ /* offset from wrapped to the start of precomp */ \ PERL_BITFIELD32 pre_prefix:4; \ + /* original flags used to compile the pattern, may differ */ \ + /* from extflags in various ways */ \ + PERL_BITFIELD32 compflags:9; \ CV *qr_anoncv /* the anon sub wrapped round qr/(?{..})/ */ typedef struct regexp { @@ -333,7 +336,17 @@ and check for NULL. /* Leave some space, so future bit allocations can go either in the shared or * unshared area without affecting binary compatibility */ -#define RXf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+1) +#define RXf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT) + +/* + Set in Perl_pmruntime if op_flags & OPf_SPECIAL, i.e. split. Will + be used by regex engines to check whether they should set + RXf_SKIPWHITE +*/ +#define RXf_SPLIT (1<<(RXf_BASE_SHIFT-1)) +#if RXf_SPLIT != RXf_PMf_SPLIT +# error "RXf_SPLIT does not match RXf_PMf_SPLIT" +#endif /* Manually decorate this function with gcc-style attributes just to * avoid having to restructure the header files and their called order, @@ -366,19 +379,6 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp) } } -/* - Two flags no longer used. - RXf_SPLIT used to be set in Perl_pmruntime if op_flags & OPf_SPECIAL, - i.e., split. It was used by the regex engine to check whether it should - set RXf_SKIPWHITE. Regexp plugins on CPAN also have done the same thing - historically, so we leave these flags defined. -*/ -#ifndef PERL_CORE -# define RXf_SPLIT 0 -# define RXf_SKIPWHITE 0 -#endif - - /* Anchor and GPOS related stuff */ #define RXf_ANCH_BOL (1<<(RXf_BASE_SHIFT+0)) #define RXf_ANCH_MBOL (1<<(RXf_BASE_SHIFT+1)) @@ -392,7 +392,7 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp) #define RXf_ANCH_SINGLE (RXf_ANCH_SBOL|RXf_ANCH_GPOS) /* What we have seen */ -#define RXf_LOOKBEHIND_SEEN (1<<(RXf_BASE_SHIFT+6)) +#define RXf_NO_INPLACE_SUBST (1<<(RXf_BASE_SHIFT+6)) #define RXf_EVAL_SEEN (1<<(RXf_BASE_SHIFT+7)) #define RXf_CANY_SEEN (1<<(RXf_BASE_SHIFT+8)) @@ -409,8 +409,6 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp) #define RXf_INTUIT_TAIL (1<<(RXf_BASE_SHIFT+14)) #define RXf_USE_INTUIT (RXf_USE_INTUIT_NOML|RXf_USE_INTUIT_ML) -#define RXf_MODIFIES_VARS (1<<(RXf_BASE_SHIFT+15)) - /* Copy and tainted info */ #define RXf_COPY_DONE (1<<(RXf_BASE_SHIFT+16)) @@ -422,6 +420,7 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp) /* Flags indicating special patterns */ #define RXf_START_ONLY (1<<(RXf_BASE_SHIFT+19)) /* Pattern is /^/ */ +#define RXf_SKIPWHITE (1<<(RXf_BASE_SHIFT+20)) /* Pattern is for a split " " */ #define RXf_WHITE (1<<(RXf_BASE_SHIFT+21)) /* Pattern is /\s+/ */ #define RXf_NULL (1U<<(RXf_BASE_SHIFT+22)) /* Pattern is // */ #if RXf_BASE_SHIFT+22 > 31 @@ -468,6 +467,7 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp) : RX_MATCH_COPIED_off(prog)) #define RXp_EXTFLAGS(rx) ((rx)->extflags) +#define RXp_COMPFLAGS(rx) ((rx)->compflags) /* For source compatibility. We used to store these explicitly. */ #define RX_PRECOMP(prog) (RX_WRAPPED(prog) + ReANY(prog)->pre_prefix) @@ -482,6 +482,7 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp) #define RX_CHECK_SUBSTR(prog) (ReANY(prog)->check_substr) #define RX_REFCNT(prog) SvREFCNT(prog) #define RX_EXTFLAGS(prog) RXp_EXTFLAGS(ReANY(prog)) +#define RX_COMPFLAGS(prog) RXp_COMPFLAGS(ReANY(prog)) #define RX_ENGINE(prog) (ReANY(prog)->engine) #define RX_SUBBEG(prog) (ReANY(prog)->subbeg) #define RX_SUBOFFSET(prog) (ReANY(prog)->suboffset) diff --git a/regnodes.h b/regnodes.h index fd2102e9ec..0caf86dd55 100644 --- a/regnodes.h +++ b/regnodes.h @@ -650,7 +650,7 @@ EXTCONST char * const PL_reg_name[] = { EXTCONST char * PL_reg_extflags_name[]; #else EXTCONST char * const PL_reg_extflags_name[] = { - /* Bits in extflags defined: 11011111111111111111111011111111 */ + /* Bits in extflags defined: 11111110111111111111111111111111 */ "MULTILINE", /* 0x00000001 */ "SINGLELINE", /* 0x00000002 */ "FOLD", /* 0x00000004 */ @@ -659,14 +659,14 @@ EXTCONST char * const PL_reg_extflags_name[] = { "CHARSET0", /* 0x00000020 : "CHARSET" - 0x000000e0 */ "CHARSET1", /* 0x00000040 : "CHARSET" - 0x000000e0 */ "CHARSET2", /* 0x00000080 : "CHARSET" - 0x000000e0 */ - "UNUSED_BIT_8", /* 0x00000100 */ + "SPLIT", /* 0x00000100 */ "ANCH_BOL", /* 0x00000200 */ "ANCH_MBOL", /* 0x00000400 */ "ANCH_SBOL", /* 0x00000800 */ "ANCH_GPOS", /* 0x00001000 */ "GPOS_SEEN", /* 0x00002000 */ "GPOS_FLOAT", /* 0x00004000 */ - "LOOKBEHIND_SEEN", /* 0x00008000 */ + "NO_INPLACE_SUBST", /* 0x00008000 */ "EVAL_SEEN", /* 0x00010000 */ "CANY_SEEN", /* 0x00020000 */ "NOSCAN", /* 0x00040000 */ @@ -675,12 +675,12 @@ EXTCONST char * const PL_reg_extflags_name[] = { "USE_INTUIT_NOML", /* 0x00200000 */ "USE_INTUIT_ML", /* 0x00400000 */ "INTUIT_TAIL", /* 0x00800000 */ - "MODIFIES_VARS", /* 0x01000000 */ + "UNUSED_BIT_24", /* 0x01000000 */ "COPY_DONE", /* 0x02000000 */ "TAINTED_SEEN", /* 0x04000000 */ "TAINTED", /* 0x08000000 */ "START_ONLY", /* 0x10000000 */ - "UNUSED_BIT_29", /* 0x20000000 */ + "SKIPWHITE", /* 0x20000000 */ "WHITE", /* 0x40000000 */ "NULL", /* 0x80000000 */ }; diff --git a/t/op/split.t b/t/op/split.t index c198737d3e..5e011595e2 100644 --- a/t/op/split.t +++ b/t/op/split.t @@ -6,7 +6,7 @@ BEGIN { require './test.pl'; } -plan tests => 115; +plan tests => 118; $FS = ':'; @@ -418,16 +418,22 @@ is($cnt, scalar(@ary)); is "@PATH", "Font GlyphNames", "hybrid scalar-and-array context"; } -# [perl #94490] constant folding should not invoke special split " " -# behaviour. -@_=split(0||" ","foo bar"); -is @_, 3, 'split(0||" ") is not treated like split(" ")'; #' - { my @results; - my $expr; - $expr = ' a b c '; + my $expr= "foo bar"; + my $cond; + + @results= split(0||" ", $expr); + is @results, 2, 'split(0||" ") is treated like split(" ")'; #' + + $cond= 0; + @results= split $cond ? " " : qr/ /, $expr; + is @results, 3, 'split($cond ? " " : qr/ /, $expr) works as expected (like qr/ /)'; + $cond= 1; + @results= split $cond ? " " : qr/ /, $expr; + is @results, 2, 'split($cond ? " " : qr/ /, $expr) works as expected (like " ")'; + $expr = ' a b c '; @results = split /\s/, $expr; is @results, 4, "split on regex of single space metacharacter: captured 4 elements"; @@ -452,10 +458,19 @@ is @_, 3, 'split(0||" ") is not treated like split(" ")'; #' "split on string of single whitespace: captured 3 elements"; is $results[0], 'a', "split on string of single whitespace: first element is non-empty; multiple contiguous space characters"; + + my @seq; + for my $cond (0,1,0,1,0) { + $expr = " foo "; + @results = split $cond ? qr/ / : " ", $expr; + push @seq, scalar(@results) . ":" . $results[-1]; + } + is join(" ", @seq), "1:foo 3:foo 1:foo 3:foo 1:foo", + qq{split(\$cond ? qr/ / : " ", "$exp") behaves as expected over repeated similar patterns}; } -TODO: { - local $::TODO = 'RT #116086: split "\x20" does not work as documented'; +{ + # 'RT #116086: split "\x20" does not work as documented'; my @results; my $expr; $expr = ' a b c '; diff --git a/t/re/recompile.t b/t/re/recompile.t index ad00df8320..63a70684be 100644 --- a/t/re/recompile.t +++ b/t/re/recompile.t @@ -22,7 +22,7 @@ BEGIN { } -plan tests => 36; +plan tests => 38; my $results = runperl( switches => [ '-Dr' ], @@ -187,3 +187,9 @@ my $y = '(?{1})'; BEGIN { $^H |= 0x00200000 } # lightweight "use re 'eval'" "a" =~ qr/a$x$_/ for $y, $y, $y; CODE + +comp_n(6, <<'CODE', 'embedded code qr'); +my $x = qr/a/i; +my $y = qr/a/; +"a" =~ qr/a$_/ for $x, $y, $x, $y; +CODE |