summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--dump.c21
-rw-r--r--ext/Devel-Peek/t/Peek.t2
-rw-r--r--op.c13
-rw-r--r--op.h2
-rw-r--r--op_reg_common.h23
-rw-r--r--pod/perlreapi.pod8
-rw-r--r--pp.c7
-rw-r--r--pp_ctl.c19
-rw-r--r--pp_hot.c2
-rw-r--r--regcomp.c30
-rw-r--r--regexp.h35
-rw-r--r--regnodes.h10
-rw-r--r--t/op/split.t35
-rw-r--r--t/re/recompile.t8
14 files changed, 151 insertions, 64 deletions
diff --git a/dump.c b/dump.c
index dd0e30577a..eaf6674ada 100644
--- a/dump.c
+++ b/dump.c
@@ -672,6 +672,8 @@ S_pm_description(pTHX_ const PMOP *pm)
if (RX_EXTFLAGS(regex) & RXf_CHECK_ALL)
sv_catpv(desc, ",ALL");
}
+ if (RX_EXTFLAGS(regex) & RXf_SKIPWHITE)
+ sv_catpv(desc, ",SKIPWHITE");
}
append_flags(desc, pmflags, pmflags_flags_names);
@@ -1449,7 +1451,7 @@ const struct flag_to_name regexp_flags_names[] = {
{RXf_ANCH_GPOS, "ANCH_GPOS,"},
{RXf_GPOS_SEEN, "GPOS_SEEN,"},
{RXf_GPOS_FLOAT, "GPOS_FLOAT,"},
- {RXf_LOOKBEHIND_SEEN, "LOOKBEHIND_SEEN,"},
+ {RXf_NO_INPLACE_SUBST, "NO_INPLACE_SUBST,"},
{RXf_EVAL_SEEN, "EVAL_SEEN,"},
{RXf_CANY_SEEN, "CANY_SEEN,"},
{RXf_NOSCAN, "NOSCAN,"},
@@ -1458,10 +1460,12 @@ const struct flag_to_name regexp_flags_names[] = {
{RXf_USE_INTUIT_NOML, "USE_INTUIT_NOML,"},
{RXf_USE_INTUIT_ML, "USE_INTUIT_ML,"},
{RXf_INTUIT_TAIL, "INTUIT_TAIL,"},
+ {RXf_SPLIT, "SPLIT,"},
{RXf_COPY_DONE, "COPY_DONE,"},
{RXf_TAINTED_SEEN, "TAINTED_SEEN,"},
{RXf_TAINTED, "TAINTED,"},
{RXf_START_ONLY, "START_ONLY,"},
+ {RXf_SKIPWHITE, "SKIPWHITE,"},
{RXf_WHITE, "WHITE,"},
{RXf_NULL, "NULL,"},
};
@@ -2091,7 +2095,17 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo
dumpregexp:
{
struct regexp * const r = ReANY((REGEXP*)sv);
- flags = RX_EXTFLAGS((REGEXP*)sv);
+ flags = r->compflags;
+ sv_setpv(d,"");
+ append_flags(d, flags, regexp_flags_names);
+ if (*(SvEND(d) - 1) == ',') {
+ SvCUR_set(d, SvCUR(d) - 1);
+ SvPVX(d)[SvCUR(d)] = '\0';
+ }
+ Perl_dump_indent(aTHX_ level, file, " COMPFLAGS = 0x%"UVxf" (%s)\n",
+ (UV)(r->compflags), SvPVX_const(d));
+
+ flags = r->extflags;
sv_setpv(d,"");
append_flags(d, flags, regexp_flags_names);
if (*(SvEND(d) - 1) == ',') {
@@ -2099,7 +2113,8 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, bo
SvPVX(d)[SvCUR(d)] = '\0';
}
Perl_dump_indent(aTHX_ level, file, " EXTFLAGS = 0x%"UVxf" (%s)\n",
- (UV)flags, SvPVX_const(d));
+ (UV)(r->extflags), SvPVX_const(d));
+
Perl_dump_indent(aTHX_ level, file, " INTFLAGS = 0x%"UVxf"\n",
(UV)(r->intflags));
Perl_dump_indent(aTHX_ level, file, " NPARENS = %"UVuf"\n",
diff --git a/ext/Devel-Peek/t/Peek.t b/ext/Devel-Peek/t/Peek.t
index 1debcb55f5..3de46003f7 100644
--- a/ext/Devel-Peek/t/Peek.t
+++ b/ext/Devel-Peek/t/Peek.t
@@ -339,6 +339,7 @@ do_test('reference to regexp',
STASH = $ADDR\\t"Regexp"'
. ($] < 5.013 ? '' :
'
+ COMPFLAGS = 0x0 \(\)
EXTFLAGS = 0x680000 \(CHECK_ALL,USE_INTUIT_NOML,USE_INTUIT_ML\)
INTFLAGS = 0x0
NPARENS = 0
@@ -943,6 +944,7 @@ do_test('UTF-8 in a regular expression',
PV = $ADDR "\(\?\^u:\\\\\\\\x\{100\}\)" \[UTF8 "\(\?\^u:\\\\\\\\x\{100\}\)"\]
CUR = 13
STASH = $ADDR "Regexp"
+ COMPFLAGS = 0x0 \(\)
EXTFLAGS = 0x680040 \(CHECK_ALL,USE_INTUIT_NOML,USE_INTUIT_ML\)
INTFLAGS = 0x0
NPARENS = 0
diff --git a/op.c b/op.c
index 2b83188b20..c502d3fe0d 100644
--- a/op.c
+++ b/op.c
@@ -4592,6 +4592,9 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg, I32 floor)
U32 rx_flags = pm->op_pmflags & RXf_PMf_COMPILETIME;
regexp_engine const *eng = current_re_engine();
+ if (o->op_flags & OPf_SPECIAL)
+ rx_flags |= RXf_SPLIT;
+
if (!has_code || !eng->op_comp) {
/* compile-time simple constant pattern */
@@ -4668,6 +4671,9 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg, I32 floor)
pm->op_pmflags |= PMf_CODELIST_PRIVATE;
}
+ if (o->op_flags & OPf_SPECIAL)
+ pm->op_pmflags |= PMf_SPLIT;
+
/* the OP_REGCMAYBE is a placeholder in the non-threaded case
* to allow its op_next to be pointed past the regcomp and
* preceding stacking ops;
@@ -9755,15 +9761,10 @@ Perl_ck_split(pTHX_ OP *o)
cLISTOPo->op_last = kid; /* There was only one element previously */
}
- if (kid->op_type == OP_CONST && !(kid->op_private & OPpCONST_FOLDED)) {
- SV * const sv = kSVOP->op_sv;
- if (SvPOK(sv) && SvCUR(sv) == 1 && *SvPVX(sv) == ' ')
- o->op_flags |= OPf_SPECIAL;
- }
if (kid->op_type != OP_MATCH || kid->op_flags & OPf_STACKED) {
OP * const sibl = kid->op_sibling;
kid->op_sibling = 0;
- kid = pmruntime( newPMOP(OP_MATCH, 0), kid, 0, 0);
+ kid = pmruntime( newPMOP(OP_MATCH, OPf_SPECIAL), kid, 0, 0); /* OPf_SPECIAL is used to trigger split " " behavior */
if (cLISTOPo->op_first == cLISTOPo->op_last)
cLISTOPo->op_last = kid;
cLISTOPo->op_first = kid;
diff --git a/op.h b/op.h
index 88703da5c8..8b87a9c14d 100644
--- a/op.h
+++ b/op.h
@@ -115,7 +115,7 @@ Deprecated. Use C<GIMME_V> instead.
/* On OP_ENTERSUB || OP_NULL, saw a "do". */
/* On OP_EXISTS, treat av as av, not avhv. */
/* On OP_(ENTER|LEAVE)EVAL, don't clear $@ */
- /* On OP_SPLIT, special split " " */
+ /* On pushre, rx is used as part of split, e.g. split " " */
/* On regcomp, "use re 'eval'" was in scope */
/* On OP_READLINE, was <$filehandle> */
/* On RV2[ACGHS]V, don't create GV--in
diff --git a/op_reg_common.h b/op_reg_common.h
index eed483b75b..9dcdaaec63 100644
--- a/op_reg_common.h
+++ b/op_reg_common.h
@@ -27,6 +27,7 @@
* RXf_PMf_STD_PMMOD_SHIFT, followed by the p. See STD_PAT_MODS and
* INT_PAT_MODS in regexp.h for the reason contiguity is needed */
/* Make sure to update lib/re.pm when changing these! */
+/* Make sure you keep the pure PMf_ versions below in sync */
#define RXf_PMf_MULTILINE (1 << (RXf_PMf_STD_PMMOD_SHIFT+0)) /* /m */
#define RXf_PMf_SINGLELINE (1 << (RXf_PMf_STD_PMMOD_SHIFT+1)) /* /s */
#define RXf_PMf_FOLD (1 << (RXf_PMf_STD_PMMOD_SHIFT+2)) /* /i */
@@ -79,13 +80,23 @@ get_regex_charset(const U32 flags)
return (regex_charset) ((flags & RXf_PMf_CHARSET) >> _RXf_PMf_CHARSET_SHIFT);
}
+#define _RXf_PMf_SHIFT_COMPILETIME (RXf_PMf_STD_PMMOD_SHIFT+8)
+
+/*
+ Set in Perl_pmruntime if op_flags & OPf_SPECIAL, i.e. split. Will
+ be used by regex engines to check whether they should set
+ RXf_SKIPWHITE
+*/
+#define RXf_PMf_SPLIT (1<<(RXf_PMf_STD_PMMOD_SHIFT+8))
+
/* Next available bit after the above. Name begins with '_' so won't be
* exported by B */
-#define _RXf_PMf_SHIFT_NEXT (RXf_PMf_STD_PMMOD_SHIFT+8)
+#define _RXf_PMf_SHIFT_NEXT (RXf_PMf_STD_PMMOD_SHIFT+9)
/* Mask of the above bits. These need to be transferred from op_pmflags to
* re->extflags during compilation */
-#define RXf_PMf_COMPILETIME (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_CHARSET|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY)
+#define RXf_PMf_COMPILETIME (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY|RXf_PMf_CHARSET)
+#define RXf_PMf_FLAGCOPYMASK (RXf_PMf_MULTILINE|RXf_PMf_SINGLELINE|RXf_PMf_FOLD|RXf_PMf_EXTENDED|RXf_PMf_KEEPCOPY|RXf_PMf_CHARSET|RXf_PMf_SPLIT)
#if RXf_PMf_COMPILETIME > 255
# error RXf_PMf_COMPILETIME wont fit in U8 flags field of eval node
@@ -97,18 +108,18 @@ get_regex_charset(const U32 flags)
#define PMf_FOLD 1<<2
#define PMf_EXTENDED 1<<3
#define PMf_KEEPCOPY 1<<4
+#define PMf_CHARSET 7<<5
+#define PMf_SPLIT 1<<8
-#if PMf_MULTILINE != RXf_PMf_MULTILINE || PMf_SINGLELINE != RXf_PMf_SINGLELINE || PMf_FOLD != RXf_PMf_FOLD || PMf_EXTENDED != RXf_PMf_EXTENDED || PMf_KEEPCOPY != RXf_PMf_KEEPCOPY
+#if PMf_MULTILINE != RXf_PMf_MULTILINE || PMf_SINGLELINE != RXf_PMf_SINGLELINE || PMf_FOLD != RXf_PMf_FOLD || PMf_EXTENDED != RXf_PMf_EXTENDED || PMf_KEEPCOPY != RXf_PMf_KEEPCOPY || PMf_SPLIT != RXf_PMf_SPLIT || PMf_CHARSET != RXf_PMf_CHARSET
# error RXf_PMf defines are wrong
#endif
-#define PMf_COMPILETIME RXf_PMf_COMPILETIME
-
/* Error check that haven't left something out of this. This isn't done
* directly in the #define because doing so confuses regcomp.pl.
* (2**n - 1) is n 1 bits, so the below gets the contiguous bits between the
* beginning and ending shifts */
-#if RXf_PMf_COMPILETIME != (((1 << (_RXf_PMf_SHIFT_NEXT))-1) \
+#if RXf_PMf_COMPILETIME != (((1 << (_RXf_PMf_SHIFT_COMPILETIME))-1) \
& (~((1 << RXf_PMf_STD_PMMOD_SHIFT)-1)))
# error RXf_PMf_COMPILETIME is invalid
#endif
diff --git a/pod/perlreapi.pod b/pod/perlreapi.pod
index f1b6fdcfbf..eaaa1790d5 100644
--- a/pod/perlreapi.pod
+++ b/pod/perlreapi.pod
@@ -223,11 +223,13 @@ Perl's engine sets this flag on empty patterns, this optimization
makes C<split //> much faster than it would otherwise be. It's even
faster than C<unpack>.
-=item RXf_MODIFIES_VARS
+=item RXf_NO_INPLACE_SUBST
Added in perl 5.18.0, this flag indicates that a regular expression might
-assign to non-magical variables (such as $REGMARK and $REGERROR) during
-matching. C<s///> will skip certain optimisations when this is set.
+perform an operation that would interfere with inplace substituion. For
+instance it might contain lookbehind, or assign to non-magical variables
+(such as $REGMARK and $REGERROR) during matching. C<s///> will skip
+certain optimisations when this is set.
=back
diff --git a/pp.c b/pp.c
index 14ba91d75e..6da9970356 100644
--- a/pp.c
+++ b/pp.c
@@ -5313,7 +5313,6 @@ PP(pp_split)
STRLEN len;
const char *s = SvPV_const(sv, len);
const bool do_utf8 = DO_UTF8(sv);
- const bool skipwhite = PL_op->op_flags & OPf_SPECIAL;
const char *strend = s + len;
PMOP *pm;
REGEXP *rx;
@@ -5346,7 +5345,7 @@ PP(pp_split)
rx = PM_GETRE(pm);
TAINT_IF(get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET &&
- (RX_EXTFLAGS(rx) & RXf_WHITE || skipwhite));
+ (RX_EXTFLAGS(rx) & (RXf_WHITE | RXf_SKIPWHITE)));
RX_MATCH_UTF8_set(rx, do_utf8);
@@ -5386,7 +5385,7 @@ PP(pp_split)
}
base = SP - PL_stack_base;
orig = s;
- if (skipwhite) {
+ if (RX_EXTFLAGS(rx) & RXf_SKIPWHITE) {
if (do_utf8) {
while (isSPACE_utf8(s))
s += UTF8SKIP(s);
@@ -5408,7 +5407,7 @@ PP(pp_split)
if (!limit)
limit = maxiters + 2;
- if (RX_EXTFLAGS(rx) & RXf_WHITE || skipwhite) {
+ if (RX_EXTFLAGS(rx) & RXf_WHITE) {
while (--limit) {
m = s;
/* this one uses 'm' and is a negative test */
diff --git a/pp_ctl.c b/pp_ctl.c
index c9d833f62c..f518bc2c95 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -83,7 +83,7 @@ PP(pp_regcomp)
REGEXP *re = NULL;
REGEXP *new_re;
const regexp_engine *eng;
- bool is_bare_re;
+ bool is_bare_re= FALSE;
if (PL_op->op_flags & OPf_STACKED) {
dMARK;
@@ -107,14 +107,27 @@ PP(pp_regcomp)
assert (re != (REGEXP*) &PL_sv_undef);
eng = re ? RX_ENGINE(re) : current_re_engine();
+ /*
+ In the below logic: these are basically the same - check if this regcomp is part of a split.
+
+ (PL_op->op_pmflags & PMf_split )
+ (PL_op->op_next->op_type == OP_PUSHRE)
+
+ We could add a new mask for this and copy the PMf_split, if we did
+ some bit definition fiddling first.
+
+ For now we leave this
+ */
+
new_re = (eng->op_comp
? eng->op_comp
: &Perl_re_op_compile
)(aTHX_ args, nargs, pm->op_code_list, eng, re,
&is_bare_re,
- (pm->op_pmflags & RXf_PMf_COMPILETIME),
+ (pm->op_pmflags & RXf_PMf_FLAGCOPYMASK),
pm->op_pmflags |
(PL_op->op_flags & OPf_SPECIAL ? PMf_USE_RE_EVAL : 0));
+
if (pm->op_pmflags & PMf_HAS_CV)
ReANY(new_re)->qr_anoncv
= (CV*) SvREFCNT_inc(PAD_SV(PL_op->op_targ));
@@ -145,11 +158,13 @@ PP(pp_regcomp)
ReREFCNT_dec(new_re);
new_re = tmp;
}
+
if (re != new_re) {
ReREFCNT_dec(re);
PM_SETRE(pm, new_re);
}
+
#ifndef INCOMPLETE_TAINTS
if (TAINTING_get && TAINT_get) {
SvTAINTED_on((SV*)new_re);
diff --git a/pp_hot.c b/pp_hot.c
index 8871593484..7eb0c61b92 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -2308,7 +2308,7 @@ PP(pp_subst)
#endif
&& (I32)clen <= RX_MINLENRET(rx)
&& (once || !(r_flags & REXEC_COPY_STR))
- && !(RX_EXTFLAGS(rx) & (RXf_LOOKBEHIND_SEEN|RXf_MODIFIES_VARS))
+ && !(RX_EXTFLAGS(rx) & RXf_NO_INPLACE_SUBST)
&& (!doutf8 || SvUTF8(TARG))
&& !(rpm->op_pmflags & PMf_NONDESTRUCT))
{
diff --git a/regcomp.c b/regcomp.c
index 5a602c4f43..34a4e9f9a6 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -5338,6 +5338,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
SV **svp;
+ DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
+ "Compiling List of SVs %d elements%s\n",pat_count, orig_rx_flags & RXf_SPLIT ? " for split" : ""));
/* apply magic and RE overloading to each arg */
for (svp = patternp; svp < patternp + pat_count; svp++) {
SV *rx = *svp;
@@ -5506,6 +5508,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
*is_bare_re = TRUE;
SvREFCNT_inc(re);
Safefree(pRExC_state->code_blocks);
+ DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
+ "Precompiled pattern%s\n", orig_rx_flags & RXf_SPLIT ? " for split" : ""));
+
return (REGEXP*)re;
}
}
@@ -5518,6 +5523,9 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
bool is_code = 0;
OP *o;
+ DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
+ "Compiling OP_LIST%s\n", orig_rx_flags & RXf_SPLIT ? " for split" : ""));
+
pat = newSVpvn("", 0);
SAVEFREESV(pat);
if (code_is_utf8)
@@ -5548,6 +5556,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
}
else {
assert(expr->op_type == OP_CONST);
+ DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
+ "Compiling OP_CONST%s\n", orig_rx_flags & RXf_SPLIT ? " for split" : ""));
pat = cSVOPx_sv(expr);
}
}
@@ -5641,13 +5651,19 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
}
/* return old regex if pattern hasn't changed */
+ /* XXX: note in the below we have to check the flags as well as the pattern.
+ *
+ * Things get a touch tricky as we have to compare the utf8 flag independently
+ * from the compile flags.
+ */
if ( old_re
&& !recompile
- && !!RX_UTF8(old_re) == !!RExC_utf8
+ && !!RX_UTF8(old_re) == !!RExC_utf8
+ && ( RX_COMPFLAGS(old_re) == ( orig_rx_flags & RXf_PMf_FLAGCOPYMASK ) )
&& RX_PRECOMP(old_re)
&& RX_PRELEN(old_re) == plen
- && memEQ(RX_PRECOMP(old_re), exp, plen))
+ && memEQ(RX_PRECOMP(old_re), exp, plen))
{
/* with runtime code, always recompile */
runtime_code = S_has_runtime_code(aTHX_ pRExC_state, expr, pm_flags,
@@ -5799,6 +5815,8 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
RXi_SET( r, ri );
r->engine= eng;
r->extflags = rx_flags;
+ RXp_COMPFLAGS(r) = orig_rx_flags & RXf_PMf_FLAGCOPYMASK;
+
if (pm_flags & PMf_IS_QR) {
ri->code_blocks = pRExC_state->code_blocks;
ri->num_code_blocks = pRExC_state->num_code_blocks;
@@ -6302,7 +6320,7 @@ reStudy:
if (RExC_seen & REG_SEEN_GPOS)
r->extflags |= RXf_GPOS_SEEN;
if (RExC_seen & REG_SEEN_LOOKBEHIND)
- r->extflags |= RXf_LOOKBEHIND_SEEN;
+ r->extflags |= RXf_NO_INPLACE_SUBST; /* inplace might break the lookbehind */
if (pRExC_state->num_code_blocks)
r->extflags |= RXf_EVAL_SEEN;
if (RExC_seen & REG_SEEN_CANY)
@@ -6310,7 +6328,7 @@ reStudy:
if (RExC_seen & REG_SEEN_VERBARG)
{
r->intflags |= PREGf_VERBARG_SEEN;
- r->extflags |= RXf_MODIFIES_VARS;
+ r->extflags |= RXf_NO_INPLACE_SUBST; /* don't understand this! Yves */
}
if (RExC_seen & REG_SEEN_CUTGROUP)
r->intflags |= PREGf_CUTGROUP_SEEN;
@@ -6327,13 +6345,15 @@ reStudy:
regnode *next = NEXTOPER(first);
U8 nop = OP(next);
-
if (PL_regkind[fop] == NOTHING && nop == END)
r->extflags |= RXf_NULL;
else if (PL_regkind[fop] == BOL && nop == END)
r->extflags |= RXf_START_ONLY;
else if (fop == PLUS && PL_regkind[nop] == POSIXD && FLAGS(next) == _CC_SPACE && OP(regnext(first)) == END)
r->extflags |= RXf_WHITE;
+ else if ( r->extflags & RXf_SPLIT && fop == EXACT && STR_LEN(first) == 1 && *(STRING(first)) == ' ' && OP(regnext(first)) == END )
+ r->extflags |= (RXf_SKIPWHITE|RXf_WHITE);
+
}
#ifdef DEBUGGING
if (RExC_paren_names) {
diff --git a/regexp.h b/regexp.h
index 51630e42ea..6b16d14593 100644
--- a/regexp.h
+++ b/regexp.h
@@ -130,6 +130,9 @@ struct reg_code_block {
/* Information about the match that isn't often used */ \
/* offset from wrapped to the start of precomp */ \
PERL_BITFIELD32 pre_prefix:4; \
+ /* original flags used to compile the pattern, may differ */ \
+ /* from extflags in various ways */ \
+ PERL_BITFIELD32 compflags:9; \
CV *qr_anoncv /* the anon sub wrapped round qr/(?{..})/ */
typedef struct regexp {
@@ -333,7 +336,17 @@ and check for NULL.
/* Leave some space, so future bit allocations can go either in the shared or
* unshared area without affecting binary compatibility */
-#define RXf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT+1)
+#define RXf_BASE_SHIFT (_RXf_PMf_SHIFT_NEXT)
+
+/*
+ Set in Perl_pmruntime if op_flags & OPf_SPECIAL, i.e. split. Will
+ be used by regex engines to check whether they should set
+ RXf_SKIPWHITE
+*/
+#define RXf_SPLIT (1<<(RXf_BASE_SHIFT-1))
+#if RXf_SPLIT != RXf_PMf_SPLIT
+# error "RXf_SPLIT does not match RXf_PMf_SPLIT"
+#endif
/* Manually decorate this function with gcc-style attributes just to
* avoid having to restructure the header files and their called order,
@@ -366,19 +379,6 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp)
}
}
-/*
- Two flags no longer used.
- RXf_SPLIT used to be set in Perl_pmruntime if op_flags & OPf_SPECIAL,
- i.e., split. It was used by the regex engine to check whether it should
- set RXf_SKIPWHITE. Regexp plugins on CPAN also have done the same thing
- historically, so we leave these flags defined.
-*/
-#ifndef PERL_CORE
-# define RXf_SPLIT 0
-# define RXf_SKIPWHITE 0
-#endif
-
-
/* Anchor and GPOS related stuff */
#define RXf_ANCH_BOL (1<<(RXf_BASE_SHIFT+0))
#define RXf_ANCH_MBOL (1<<(RXf_BASE_SHIFT+1))
@@ -392,7 +392,7 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp)
#define RXf_ANCH_SINGLE (RXf_ANCH_SBOL|RXf_ANCH_GPOS)
/* What we have seen */
-#define RXf_LOOKBEHIND_SEEN (1<<(RXf_BASE_SHIFT+6))
+#define RXf_NO_INPLACE_SUBST (1<<(RXf_BASE_SHIFT+6))
#define RXf_EVAL_SEEN (1<<(RXf_BASE_SHIFT+7))
#define RXf_CANY_SEEN (1<<(RXf_BASE_SHIFT+8))
@@ -409,8 +409,6 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp)
#define RXf_INTUIT_TAIL (1<<(RXf_BASE_SHIFT+14))
#define RXf_USE_INTUIT (RXf_USE_INTUIT_NOML|RXf_USE_INTUIT_ML)
-#define RXf_MODIFIES_VARS (1<<(RXf_BASE_SHIFT+15))
-
/* Copy and tainted info */
#define RXf_COPY_DONE (1<<(RXf_BASE_SHIFT+16))
@@ -422,6 +420,7 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp)
/* Flags indicating special patterns */
#define RXf_START_ONLY (1<<(RXf_BASE_SHIFT+19)) /* Pattern is /^/ */
+#define RXf_SKIPWHITE (1<<(RXf_BASE_SHIFT+20)) /* Pattern is for a split " " */
#define RXf_WHITE (1<<(RXf_BASE_SHIFT+21)) /* Pattern is /\s+/ */
#define RXf_NULL (1U<<(RXf_BASE_SHIFT+22)) /* Pattern is // */
#if RXf_BASE_SHIFT+22 > 31
@@ -468,6 +467,7 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp)
: RX_MATCH_COPIED_off(prog))
#define RXp_EXTFLAGS(rx) ((rx)->extflags)
+#define RXp_COMPFLAGS(rx) ((rx)->compflags)
/* For source compatibility. We used to store these explicitly. */
#define RX_PRECOMP(prog) (RX_WRAPPED(prog) + ReANY(prog)->pre_prefix)
@@ -482,6 +482,7 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp)
#define RX_CHECK_SUBSTR(prog) (ReANY(prog)->check_substr)
#define RX_REFCNT(prog) SvREFCNT(prog)
#define RX_EXTFLAGS(prog) RXp_EXTFLAGS(ReANY(prog))
+#define RX_COMPFLAGS(prog) RXp_COMPFLAGS(ReANY(prog))
#define RX_ENGINE(prog) (ReANY(prog)->engine)
#define RX_SUBBEG(prog) (ReANY(prog)->subbeg)
#define RX_SUBOFFSET(prog) (ReANY(prog)->suboffset)
diff --git a/regnodes.h b/regnodes.h
index fd2102e9ec..0caf86dd55 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -650,7 +650,7 @@ EXTCONST char * const PL_reg_name[] = {
EXTCONST char * PL_reg_extflags_name[];
#else
EXTCONST char * const PL_reg_extflags_name[] = {
- /* Bits in extflags defined: 11011111111111111111111011111111 */
+ /* Bits in extflags defined: 11111110111111111111111111111111 */
"MULTILINE", /* 0x00000001 */
"SINGLELINE", /* 0x00000002 */
"FOLD", /* 0x00000004 */
@@ -659,14 +659,14 @@ EXTCONST char * const PL_reg_extflags_name[] = {
"CHARSET0", /* 0x00000020 : "CHARSET" - 0x000000e0 */
"CHARSET1", /* 0x00000040 : "CHARSET" - 0x000000e0 */
"CHARSET2", /* 0x00000080 : "CHARSET" - 0x000000e0 */
- "UNUSED_BIT_8", /* 0x00000100 */
+ "SPLIT", /* 0x00000100 */
"ANCH_BOL", /* 0x00000200 */
"ANCH_MBOL", /* 0x00000400 */
"ANCH_SBOL", /* 0x00000800 */
"ANCH_GPOS", /* 0x00001000 */
"GPOS_SEEN", /* 0x00002000 */
"GPOS_FLOAT", /* 0x00004000 */
- "LOOKBEHIND_SEEN", /* 0x00008000 */
+ "NO_INPLACE_SUBST", /* 0x00008000 */
"EVAL_SEEN", /* 0x00010000 */
"CANY_SEEN", /* 0x00020000 */
"NOSCAN", /* 0x00040000 */
@@ -675,12 +675,12 @@ EXTCONST char * const PL_reg_extflags_name[] = {
"USE_INTUIT_NOML", /* 0x00200000 */
"USE_INTUIT_ML", /* 0x00400000 */
"INTUIT_TAIL", /* 0x00800000 */
- "MODIFIES_VARS", /* 0x01000000 */
+ "UNUSED_BIT_24", /* 0x01000000 */
"COPY_DONE", /* 0x02000000 */
"TAINTED_SEEN", /* 0x04000000 */
"TAINTED", /* 0x08000000 */
"START_ONLY", /* 0x10000000 */
- "UNUSED_BIT_29", /* 0x20000000 */
+ "SKIPWHITE", /* 0x20000000 */
"WHITE", /* 0x40000000 */
"NULL", /* 0x80000000 */
};
diff --git a/t/op/split.t b/t/op/split.t
index c198737d3e..5e011595e2 100644
--- a/t/op/split.t
+++ b/t/op/split.t
@@ -6,7 +6,7 @@ BEGIN {
require './test.pl';
}
-plan tests => 115;
+plan tests => 118;
$FS = ':';
@@ -418,16 +418,22 @@ is($cnt, scalar(@ary));
is "@PATH", "Font GlyphNames", "hybrid scalar-and-array context";
}
-# [perl #94490] constant folding should not invoke special split " "
-# behaviour.
-@_=split(0||" ","foo bar");
-is @_, 3, 'split(0||" ") is not treated like split(" ")'; #'
-
{
my @results;
- my $expr;
- $expr = ' a b c ';
+ my $expr= "foo bar";
+ my $cond;
+
+ @results= split(0||" ", $expr);
+ is @results, 2, 'split(0||" ") is treated like split(" ")'; #'
+
+ $cond= 0;
+ @results= split $cond ? " " : qr/ /, $expr;
+ is @results, 3, 'split($cond ? " " : qr/ /, $expr) works as expected (like qr/ /)';
+ $cond= 1;
+ @results= split $cond ? " " : qr/ /, $expr;
+ is @results, 2, 'split($cond ? " " : qr/ /, $expr) works as expected (like " ")';
+ $expr = ' a b c ';
@results = split /\s/, $expr;
is @results, 4,
"split on regex of single space metacharacter: captured 4 elements";
@@ -452,10 +458,19 @@ is @_, 3, 'split(0||" ") is not treated like split(" ")'; #'
"split on string of single whitespace: captured 3 elements";
is $results[0], 'a',
"split on string of single whitespace: first element is non-empty; multiple contiguous space characters";
+
+ my @seq;
+ for my $cond (0,1,0,1,0) {
+ $expr = " foo ";
+ @results = split $cond ? qr/ / : " ", $expr;
+ push @seq, scalar(@results) . ":" . $results[-1];
+ }
+ is join(" ", @seq), "1:foo 3:foo 1:foo 3:foo 1:foo",
+ qq{split(\$cond ? qr/ / : " ", "$exp") behaves as expected over repeated similar patterns};
}
-TODO: {
- local $::TODO = 'RT #116086: split "\x20" does not work as documented';
+{
+ # 'RT #116086: split "\x20" does not work as documented';
my @results;
my $expr;
$expr = ' a b c ';
diff --git a/t/re/recompile.t b/t/re/recompile.t
index ad00df8320..63a70684be 100644
--- a/t/re/recompile.t
+++ b/t/re/recompile.t
@@ -22,7 +22,7 @@ BEGIN {
}
-plan tests => 36;
+plan tests => 38;
my $results = runperl(
switches => [ '-Dr' ],
@@ -187,3 +187,9 @@ my $y = '(?{1})';
BEGIN { $^H |= 0x00200000 } # lightweight "use re 'eval'"
"a" =~ qr/a$x$_/ for $y, $y, $y;
CODE
+
+comp_n(6, <<'CODE', 'embedded code qr');
+my $x = qr/a/i;
+my $y = qr/a/;
+"a" =~ qr/a$_/ for $x, $y, $x, $y;
+CODE