summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2011-08-25 11:41:49 +0100
committerDavid Mitchell <davem@iabyn.com>2011-12-19 15:06:01 +0000
commitbdd643fb5cdb1f504ab1a891549d8ce41bb07349 (patch)
tree7b1f0b6b2917bed930bae19e76ea92dd469f85aa
parent1167762429d91a50dd5b14c8130f08327f581a39 (diff)
downloadperl-bdd643fb5cdb1f504ab1a891549d8ce41bb07349.tar.gz
Mostly complete fix for literal /(?{..})/ blocks
Change the way that code blocks in patterns are parsed and executed, especially as regards lexical and scoping behaviour. (Note that this fix only applies to literal code blocks appearing within patterns: run-time patterns, and literals within qr//, are still done the old broken way for now). This change means that for literal /(?{..})/ and /(??{..})/: * the code block is now fully parsed in the same pass as the surrounding code, which means that the compiler no longer just does a simplistic count of balancing {} to find the limits of the code block; i.e. stuff like /(?{ $x = "{" })/ now works (in the same way that subscripts in double quoted strings always have: "$a{'{'}" ) * Error and warning messages will now appear to emanate from the main body rather than an re_eval; e.g. the output from #!/usr/bin/perl /(?{ warn "boo" })/ has changed from boo at (re_eval 1) line 1. to boo at /tmp/p line 2. * scope and closures now behave as you might expect; for example for my $x (qw(a b c)) { "" =~ /(?{ print $x })/ } now prints "abc" rather than "" * with recursion, it now finds the lexical within the appropriate depth of pad: this code now prints "012" rather than "000": sub recurse { my ($n) = @_; return if $n > 2; "" =~ /^(?{print $n})/; recurse($n+1); } recurse(0); * an earlier fix that stopped 'my' declarations within code blocks causing crashes, required the accumulating of two SAVECOMPPADs on the stack for each iteration of the code block; this is no longer needed; * UNITCHECK blocks within literal code blocks are now run as part of the main body of code (run-time code blocks still trigger an immediate call to the UNITCHECK block though) This is all achieved by building upon the efforts of the commits which led up to this; those altered the parser to parse literal code blocks directly, but up until now those code blocks were discarded by Perl_pmruntime and the block re-compiled using the original re_eval mechanism. As of this commit, for the non-qr and non-runtime variants, those code blocks are no longer thrown away. Instead: * the LISTOP generated by the parser, which contains all the code blocks plus OP_CONSTs that collectively make up the literal pattern, is now stored in a new field in PMOPs, called op_code_list. For example in /A(?{BLOCK})C/, the listop stored in op_code_list looks like LIST PUSHMARK CONST['A'] NULL/special (aka a DO block) BLOCK CONST['(?{BLOCK})'] CONST['B'] * each of the code blocks has its last op set to null and is individually run through the peephole optimiser, so each one becomes a little self-contained block of code, rather than a list of blocks that run into each other; * then in re_op_compile(), we concatenate the list of CONSTs to produce a string to be compiled, but at the same time we note any DO blocks and note the start and end positions of the corresponding CONST['(?{BLOCK})']; * (if the current regex engine isn't the built-in perl one, then we just throw away the code blocks and pass the concatenated string to the engine) * then during regex compilation, whenever we encounter a '(?{', we see if it matches the index of one of the pre-compiled blocks, and if so, we store a pointer to that block in an 'l' data slot, and use the end index to skip over the text of the code body. Conversely, if the index doesn't match, then we know that it's a run-time pattern and (for now), compile it in the old way. * During execution, when an EVAL op is encountered, if data->what is 'l', then we just use the pad that was in effect when the pattern was called; i.e. we use the current pad slot of the currently executing CV that the pattern is embedded within.
-rw-r--r--dump.c4
-rw-r--r--op.c101
-rw-r--r--op.h1
-rw-r--r--pod/perlmod.pod2
-rw-r--r--regcomp.c227
-rw-r--r--regcomp.h1
-rw-r--r--regexec.c26
-rw-r--r--t/lib/strict/refs2
-rw-r--r--t/lib/strict/subs7
-rw-r--r--t/op/blocks.t27
-rw-r--r--t/re/pat_re_eval.t14
-rw-r--r--t/re/re_tests3
-rw-r--r--t/re/reg_eval_scope.t4
-rw-r--r--t/run/fresh_perl.t3
14 files changed, 293 insertions, 129 deletions
diff --git a/dump.c b/dump.c
index 2c635deec9..70ce28ba79 100644
--- a/dump.c
+++ b/dump.c
@@ -613,6 +613,10 @@ Perl_do_pmop_dump(pTHX_ I32 level, PerlIO *file, const PMOP *pm)
Perl_dump_indent(aTHX_ level, file, "PMf_REPL = ");
op_dump(pm->op_pmreplrootu.op_pmreplroot);
}
+ if (pm->op_code_list) {
+ Perl_dump_indent(aTHX_ level, file, "CODE_LIST =\n");
+ do_op_dump(level, file, pm->op_code_list);
+ }
if (pm->op_pmflags || (PM_GETRE(pm) && RX_CHECK_SUBSTR(PM_GETRE(pm)))) {
SV * const tmpsv = pm_description(pm);
Perl_dump_indent(aTHX_ level, file, "PMFLAGS = (%s)\n", SvCUR(tmpsv) ? SvPVX_const(tmpsv) + 1 : "");
diff --git a/op.c b/op.c
index e6bbe370b6..4a5cd531f0 100644
--- a/op.c
+++ b/op.c
@@ -677,6 +677,8 @@ Perl_op_clear(pTHX_ OP *o)
case OP_MATCH:
case OP_QR:
clear_pmop:
+ op_free(cPMOPo->op_code_list);
+ cPMOPo->op_code_list = NULL;
forget_pmop(cPMOPo, 1);
cPMOPo->op_pmreplrootu.op_pmreplroot = NULL;
/* we use the same protection as the "SAFE" version of the PM_ macros
@@ -4244,45 +4246,83 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg)
is_compiletime = 0;
}
}
- else { assert(expr->op_type != OP_PUSHMARK); if (expr->op_type != OP_CONST && expr->op_type != OP_PUSHMARK)
+ else if (expr->op_type != OP_CONST)
is_compiletime = 0;
- }
/* are we using an external (non-perl) re engine? */
eng = current_re_engine();
ext_eng = (eng && eng != &PL_core_reg_engine);
- /* concatenate adjacent CONSTs, and for non-perl engines, strip out
- * any DO blocks */
+ /* for perl engine:
+ * concatenate adjacent CONSTs for non-code case
+ * pre-process DO blocks;
+ * for non-perl engines:
+ * concatenate adjacent CONSTs;
+ * strip out any DO blocks
+ */
- if (expr->op_type == OP_LIST
- && (!is_compiletime || /* XXX TMP until we handle runtime (?{}) */
- !has_code || ext_eng))
- {
- OP *o, *kid;
- o = cLISTOPx(expr)->op_first;
- while (o->op_sibling) {
- kid = o->op_sibling;
+ if (expr->op_type == OP_LIST) {
+ OP *kid, *okid = NULL;
+ kid = cLISTOPx(expr)->op_first;
+ while (kid) {
if (kid->op_type == OP_NULL && (kid->op_flags & OPf_SPECIAL)) {
/* do {...} */
- o->op_sibling = kid->op_sibling;
- kid->op_sibling = NULL;
- op_free(kid);
+ if (ext_eng || !is_compiletime/*XXX tmp*/
+ || o->op_type == OP_QR/*XXX tmp*/) {
+ assert(okid);
+ okid->op_sibling = kid->op_sibling;
+ kid->op_sibling = NULL;
+ op_free(kid);
+ kid = okid;
+ }
+ else {
+ /* treat each DO block as a separate little sub */
+ scalar(kid);
+ LINKLIST(kid);
+ if (kLISTOP->op_first->op_type == OP_LEAVE) {
+ LISTOP *leave = cLISTOPx(kLISTOP->op_first);
+ /* skip ENTER */
+ assert(leave->op_first->op_type == OP_ENTER);
+ assert(leave->op_first->op_sibling);
+ kid->op_next = leave->op_first->op_sibling;
+ /* skip LEAVE */
+ assert(leave->op_flags & OPf_KIDS);
+ assert(leave->op_last->op_next = (OP*)leave);
+ leave->op_next = NULL; /* stop on last op */
+ op_null((OP*)leave);
+ }
+ else {
+ /* skip SCOPE */
+ OP *scope = kLISTOP->op_first;
+ assert(scope->op_type == OP_SCOPE);
+ assert(scope->op_flags & OPf_KIDS);
+ scope->op_next = NULL; /* stop on last op */
+ op_null(scope);
+ }
+ CALL_PEEP(kid);
+ finalize_optree(kid);
+ }
}
- else if (o->op_type == OP_CONST && kid->op_type == OP_CONST){
- SV* sv = cSVOPo->op_sv;
+ else if ( (ext_eng || !has_code || !is_compiletime/*XXX tmp*/)
+ && kid->op_type == OP_CONST
+ && kid->op_sibling
+ && kid->op_sibling->op_type == OP_CONST)
+ {
+ OP *o = kid->op_sibling;
+ SV* sv = cSVOPx_sv(kid);
SvREADONLY_off(sv);
- sv_catsv(sv, cSVOPx(kid)->op_sv);
+ sv_catsv(sv, cSVOPo_sv);
SvREADONLY_on(sv);
- o->op_sibling = kid->op_sibling;
- kid->op_sibling = NULL;
- op_free(kid);
+ kid->op_sibling = o->op_sibling;
+ o->op_sibling = NULL;
+ op_free(o);
+ kid = okid;
}
- else
- o = o->op_sibling;
+ okid = kid;
+ kid = kid->op_sibling;
}
- cLISTOPx(expr)->op_last = o;
+ cLISTOPx(expr)->op_last = okid;
}
PL_hints |= HINT_BLOCK_SCOPE;
@@ -4320,15 +4360,16 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg)
}
PM_SETRE(pm, CALLREGCOMP(pat, pm_flags));
- }
- else
- PM_SETRE(pm, re_op_compile(NULL, expr, pm_flags));
-
#ifdef PERL_MAD
- op_getmad(expr,(OP*)pm,'e');
+ op_getmad(expr,(OP*)pm,'e');
#else
- op_free(expr);
+ op_free(expr);
#endif
+ }
+ else {
+ pm->op_code_list = expr;
+ PM_SETRE(pm, re_op_compile(NULL, expr, pm_flags));
+ }
}
else {
bool reglist;
diff --git a/op.h b/op.h
index ffa9a3f163..8595c38cb7 100644
--- a/op.h
+++ b/op.h
@@ -372,6 +372,7 @@ struct pmop {
HV * op_pmstash;
#endif
} op_pmstashstartu;
+ OP * op_code_list; /* list of (?{}) code blocks */
};
#ifdef USE_ITHREADS
diff --git a/pod/perlmod.pod b/pod/perlmod.pod
index 33f098d022..9d02c3f70a 100644
--- a/pod/perlmod.pod
+++ b/pod/perlmod.pod
@@ -307,7 +307,7 @@ the main program.
C<UNITCHECK> blocks are run just after the unit which defined them has
been compiled. The main program file and each module it loads are
-compilation units, as are string C<eval>s, code compiled using the
+compilation units, as are string C<eval>s, run-time code compiled using the
C<(?{ })> construct in a regex, calls to C<do FILE>, C<require FILE>,
and code after the C<-e> switch on the command line.
diff --git a/regcomp.c b/regcomp.c
index 277b19dba2..255315522c 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -144,6 +144,11 @@ typedef struct RExC_state_t {
I32 in_lookbehind;
I32 contains_locale;
I32 override_recoding;
+ int max_code_index; /* max index into code_indices */
+ int code_index; /* index into code_indices */
+ STRLEN *code_indices; /* begin and ends of literal (?{})
+ within pattern */
+ OP* next_code_or_const; /* iterating the list of DO/OP_CONST */
#if ADD_TO_REGEXEC
char *starttry; /* -Dr: where regtry was called. */
#define RExC_starttry (pRExC_state->starttry)
@@ -4540,6 +4545,35 @@ Perl_re_compile(pTHX_ SV * const pattern, U32 orig_pm_flags)
return Perl_re_op_compile(aTHX_ pattern, NULL, orig_pm_flags);
}
+/* given a list of CONSTs and DO blocks in expr, append all the CONSTs to
+ * pat, and record the start and end of each code block in code_indices
+ * (each DO{} op is followed by an OP_CONST containing the corresponding
+ * literal '(?{...}) text)
+ */
+
+static void
+S_get_pat_and_code_indices(pTHX_ RExC_state_t *pRExC_state, OP* expr, SV* pat) {
+ int ncode = 0;
+ bool is_code = 0;
+ OP *o;
+ for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) {
+ if (o->op_type == OP_CONST) {
+ sv_catsv(pat, cSVOPo_sv);
+ if (is_code) {
+ pRExC_state->code_indices[ncode++] = SvCUR(pat); /* end pos */
+ is_code = 0;
+ }
+ }
+ else if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) {
+ assert(ncode < pRExC_state->max_code_index);
+ pRExC_state->code_indices[ncode++] = SvCUR(pat); /*start pos */
+ is_code = 1;
+ }
+ }
+ pRExC_state->code_index = 0;
+}
+
+
/*
* Perl_op_re_compile - the perl internal RE engine's function to compile a
* regular expression into internal code.
@@ -4596,33 +4630,37 @@ Perl_re_op_compile(pTHX_ SV * const pattern, OP *expr, U32 orig_pm_flags)
DEBUG_r(if (!PL_colorset) reginitcolors());
+ pRExC_state->code_indices = NULL;
+ pRExC_state->max_code_index = 0;
if (expr) {
- /* XXX tmp get rid of DO blocks, concat CONSTs */
- OP *o, *kid;
- o = cLISTOPx(expr)->op_first;
- while (o->op_sibling) {
- kid = o->op_sibling;
- if (kid->op_type == OP_NULL && (kid->op_flags & OPf_SPECIAL)) {
- /* do {...} */
- o->op_sibling = kid->op_sibling;
- kid->op_sibling = NULL;
- op_free(kid);
+ if (expr->op_type == OP_LIST) {
+ OP *o;
+ bool is_utf8 = 0;
+ int ncode = 0;
+
+ /* are we UTF8, and how many code blocks are there? */
+ for (o = cLISTOPx(expr)->op_first; o; o = o->op_sibling) {
+ if (o->op_type == OP_CONST && SvUTF8(cSVOPo_sv))
+ is_utf8 = 1;
+ else if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL))
+ /* count of DO blocks */
+ ncode++;
}
- else if (o->op_type == OP_CONST && kid->op_type == OP_CONST){
- SV* sv = cSVOPo->op_sv;
- SvREADONLY_off(sv);
- sv_catsv(sv, cSVOPx(kid)->op_sv);
- SvREADONLY_on(sv);
- o->op_sibling = kid->op_sibling;
- kid->op_sibling = NULL;
- op_free(kid);
+ pRExC_state->max_code_index = ncode*2;
+ if (ncode) {
+ Newx(pRExC_state->code_indices, ncode*2, STRLEN);
+ SAVEFREEPV(pRExC_state->code_indices);
}
- else
- o = o->op_sibling;
+ pat = newSVpvn("", 0);
+ SAVEFREESV(pat);
+ if (is_utf8)
+ SvUTF8_on(pat);
+ S_get_pat_and_code_indices(aTHX_ pRExC_state, expr, pat);
+ }
+ else {
+ assert(expr->op_type == OP_CONST);
+ pat = cSVOPx_sv(expr);
}
- cLISTOPx(expr)->op_last = o;
- pat = ((SVOP*)(expr->op_type == OP_LIST
- ? cLISTOPx(expr)->op_first->op_sibling : expr))->op_sv;
}
else
pat = pattern;
@@ -4676,10 +4714,20 @@ Perl_re_op_compile(pTHX_ SV * const pattern, OP *expr, U32 orig_pm_flags)
-- dmq */
DEBUG_PARSE_r(PerlIO_printf(Perl_debug_log,
"UTF8 mismatch! Converting to utf8 for resizing and compile\n"));
- exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)SvPV(pat, plen), &len);
- xend = exp + len;
- RExC_orig_utf8 = RExC_utf8 = 1;
- SAVEFREEPV(exp);
+
+ if (expr && expr->op_type == OP_LIST) {
+ sv_setpvn(pat, "", 0);
+ SvUTF8_on(pat);
+ S_get_pat_and_code_indices(aTHX_ pRExC_state, expr, pat);
+ exp = SvPV(pat, plen);
+ xend = exp + plen;
+ }
+ else {
+ exp = (char*)Perl_bytes_to_utf8(aTHX_ (U8*)SvPV(pat, plen), &len);
+ xend = exp + len;
+ SAVEFREEPV(exp);
+ }
+ RExC_orig_utf8 = RExC_utf8 = 1;
}
#ifdef TRIE_STUDY_OPT
@@ -4889,6 +4937,11 @@ Perl_re_op_compile(pTHX_ SV * const pattern, OP *expr, U32 orig_pm_flags)
RExC_emit_start = ri->program;
RExC_emit = ri->program;
RExC_emit_bound = ri->program + RExC_size + 1;
+ pRExC_state->code_index = 0;
+ if (expr && expr->op_type == OP_LIST) {
+ assert(cLISTOPx(expr)->op_first->op_type == OP_PUSHMARK);
+ pRExC_state->next_code_or_const = cLISTOPx(expr)->op_first;
+ }
/* Store the count of eval-groups for security checks: */
RExC_rx->seen_evals = RExC_seen_evals;
@@ -7256,55 +7309,83 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
RExC_seen_zerolen++;
RExC_seen |= REG_SEEN_EVAL;
- while (count && (c = *RExC_parse)) {
- if (c == '\\') {
- if (RExC_parse[1])
- RExC_parse++;
+
+ if (pRExC_state->max_code_index
+ && pRExC_state->code_indices[pRExC_state->code_index] ==
+ (STRLEN)((RExC_parse -3 - (is_logical ? 1 : 0))
+ - RExC_start)
+ ) {
+ /* this is a pre-compiled literal (?{}) */
+ assert(pRExC_state->code_index
+ < pRExC_state->max_code_index);
+ RExC_parse = RExC_start - 1
+ + pRExC_state->code_indices[++pRExC_state->code_index];
+ pRExC_state->code_index++;
+ if (SIZE_ONLY)
+ RExC_seen_evals++;
+ else {
+ OP *o = pRExC_state->next_code_or_const;
+ while(! (o->op_type == OP_NULL
+ && (o->op_flags & OPf_SPECIAL)))
+ {
+ o = o->op_sibling;
+ }
+ n = add_data(pRExC_state, 1, "l");
+ RExC_rxi->data->data[n] = (void*)o->op_next;
+ pRExC_state->next_code_or_const = o->op_sibling;
}
- else if (c == '{')
- count++;
- else if (c == '}')
- count--;
- RExC_parse++;
}
- if (*RExC_parse != ')') {
- RExC_parse = s;
- vFAIL("Sequence (?{...}) not terminated or not {}-balanced");
- }
- if (!SIZE_ONLY) {
- PAD *pad;
- OP_4tree *sop, *rop;
- SV * const sv = newSVpvn(s, RExC_parse - 1 - s);
-
- ENTER;
- Perl_save_re_context(aTHX);
- rop = Perl_sv_compile_2op_is_broken(aTHX_ sv, &sop, "re", &pad);
- sop->op_private |= OPpREFCOUNTED;
- /* re_dup will OpREFCNT_inc */
- OpREFCNT_set(sop, 1);
- LEAVE;
-
- n = add_data(pRExC_state, 3, "nop");
- RExC_rxi->data->data[n] = (void*)rop;
- RExC_rxi->data->data[n+1] = (void*)sop;
- RExC_rxi->data->data[n+2] = (void*)pad;
- SvREFCNT_dec(sv);
- }
- else { /* First pass */
- if (PL_reginterp_cnt < ++RExC_seen_evals
- && IN_PERL_RUNTIME)
- /* No compiled RE interpolated, has runtime
- components ===> unsafe. */
- FAIL("Eval-group not allowed at runtime, use re 'eval'");
- if (PL_tainting && PL_tainted)
- FAIL("Eval-group in insecure regular expression");
-#if PERL_VERSION > 8
- if (IN_PERL_COMPILETIME)
- PL_cv_has_eval = 1;
-#endif
+ else {
+ while (count && (c = *RExC_parse)) {
+ if (c == '\\') {
+ if (RExC_parse[1])
+ RExC_parse++;
+ }
+ else if (c == '{')
+ count++;
+ else if (c == '}')
+ count--;
+ RExC_parse++;
+ }
+ if (*RExC_parse != ')') {
+ RExC_parse = s;
+ vFAIL("Sequence (?{...}) not terminated or not {}-balanced");
+ }
+ if (!SIZE_ONLY) {
+ PAD *pad;
+ OP_4tree *sop, *rop;
+ SV * const sv = newSVpvn(s, RExC_parse - 1 - s);
+
+ ENTER;
+ Perl_save_re_context(aTHX);
+ rop = Perl_sv_compile_2op_is_broken(aTHX_ sv, &sop, "re", &pad);
+ sop->op_private |= OPpREFCOUNTED;
+ /* re_dup will OpREFCNT_inc */
+ OpREFCNT_set(sop, 1);
+ LEAVE;
+
+ n = add_data(pRExC_state, 3, "nop");
+ RExC_rxi->data->data[n] = (void*)rop;
+ RExC_rxi->data->data[n+1] = (void*)sop;
+ RExC_rxi->data->data[n+2] = (void*)pad;
+ SvREFCNT_dec(sv);
+ }
+ else { /* First pass */
+ if (PL_reginterp_cnt < ++RExC_seen_evals
+ && IN_PERL_RUNTIME)
+ /* No compiled RE interpolated, has runtime
+ components ===> unsafe. */
+ FAIL("Eval-group not allowed at runtime, use re 'eval'");
+ if (PL_tainting && PL_tainted)
+ FAIL("Eval-group in insecure regular expression");
+ #if PERL_VERSION > 8
+ if (IN_PERL_COMPILETIME)
+ PL_cv_has_eval = 1;
+ #endif
+ }
}
-
nextchar(pRExC_state);
+
if (is_logical) {
ret = reg_node(pRExC_state, LOGICAL);
if (!SIZE_ONLY)
@@ -11970,6 +12051,7 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
SvREFCNT_dec(MUTABLE_SV(new_comppad));
new_comppad = NULL;
break;
+ case 'l':
case 'n':
break;
case 'T':
@@ -12210,6 +12292,7 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
((reg_trie_data*)ri->data->data[i])->refcount++;
OP_REFCNT_UNLOCK;
/* Fall through */
+ case 'l':
case 'n':
d->data[i] = ri->data->data[i];
break;
diff --git a/regcomp.h b/regcomp.h
index 81c8a5ddd7..f1737f7347 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -532,6 +532,7 @@ END_EXTERN_C
* The character describes the function of the corresponding .data item:
* a - AV for paren_name_list under DEBUGGING
* f - start-class data for regstclass optimization
+ * l - start op for literal (?{EVAL}) item
* n - Root of op tree for (?{EVAL}) item
* o - Start op for (?{EVAL}) item
* p - Pad for (?{EVAL}) item
diff --git a/regexec.c b/regexec.c
index ced20bde35..e24a3b97c2 100644
--- a/regexec.c
+++ b/regexec.c
@@ -3111,6 +3111,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
false: plain (?=foo)
true: used as a condition: (?(?=foo))
*/
+ PAD* const initial_pad = PL_comppad;
#ifdef DEBUGGING
GET_RE_DEBUG_FLAGS_DECL;
#endif
@@ -4234,7 +4235,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
SV ** const before = SP;
OP_4tree * const oop = PL_op;
COP * const ocurcop = PL_curcop;
- PAD *old_comppad;
+ PAD *old_comppad, *new_comppad;
char *saved_regeol = PL_regeol;
struct re_save_state saved_state;
@@ -4255,7 +4256,14 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
Copy(&PL_reg_state, &saved_state, 1, struct re_save_state);
n = ARG(scan);
- PL_op = (OP_4tree*)rexi->data->data[n];
+ if (rexi->data->what[n] == 'l') { /* literal code */
+ new_comppad = initial_pad; /* the pad of the current sub */
+ PL_op = (OP_4tree*)rexi->data->data[n];
+ }
+ else {
+ PL_op = (OP_4tree*)rexi->data->data[n];
+ new_comppad = (PAD*)rexi->data->data[n + 2];
+ }
DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
" re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
/* wrap the call in two SAVECOMPPADs. This ensures that
@@ -4263,8 +4271,12 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
* accumulated SAVEt_CLEARSV's will be processed with
* interspersed SAVEt_COMPPAD's to ensure that lexicals
* are cleared in the right pad */
- SAVECOMPPAD();
- PAD_SAVE_LOCAL(old_comppad, (PAD*)rexi->data->data[n + 2]);
+ if (PL_comppad == new_comppad)
+ old_comppad = new_comppad;
+ else {
+ SAVECOMPPAD();
+ PAD_SAVE_LOCAL(old_comppad, new_comppad);
+ }
PL_regoffs[0].end = PL_reg_magic->mg_len = locinput - PL_bostr;
if (sv_yes_mark) {
@@ -4284,8 +4296,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
Copy(&saved_state, &PL_reg_state, 1, struct re_save_state);
PL_op = oop;
- SAVECOMPPAD();
- PAD_RESTORE_LOCAL(old_comppad);
+ if (old_comppad != PL_comppad) {
+ SAVECOMPPAD();
+ PAD_RESTORE_LOCAL(old_comppad);
+ }
PL_curcop = ocurcop;
PL_regeol = saved_regeol;
if (!logical) {
diff --git a/t/lib/strict/refs b/t/lib/strict/refs
index 09b962f71b..06ee6dd2df 100644
--- a/t/lib/strict/refs
+++ b/t/lib/strict/refs
@@ -308,7 +308,7 @@ Can't use string ("Fred") as a SCALAR ref while "strict refs" in use at - line 8
use strict 'refs';
/(?{${"foo"}++})/;
EXPECT
-Can't use string ("foo") as a SCALAR ref while "strict refs" in use at (re_eval 1) line 1.
+Can't use string ("foo") as a SCALAR ref while "strict refs" in use at - line 3.
########
# [perl #37886] strict 'refs' doesn't apply inside defined
use strict 'refs';
diff --git a/t/lib/strict/subs b/t/lib/strict/subs
index 57327cca0b..e3e401408f 100644
--- a/t/lib/strict/subs
+++ b/t/lib/strict/subs
@@ -451,3 +451,10 @@ sub foo {
EXPECT
Bareword "FOO" not allowed while "strict subs" in use at - line 5.
Execution of - aborted due to compilation errors.
+########
+# make sure checks are done within (?{})
+use strict 'subs';
+/(?{FOO})/
+EXPECT
+Bareword "FOO" not allowed while "strict subs" in use at - line 3.
+Execution of - aborted due to compilation errors.
diff --git a/t/op/blocks.t b/t/op/blocks.t
index eefaab68cc..0fdc4f69b5 100644
--- a/t/op/blocks.t
+++ b/t/op/blocks.t
@@ -13,13 +13,13 @@ b1
b2
b3
b4
-b6
-u5
+b6-c
b7
u6
+u5-c
u1
c3
-c2
+c2-c
c1
i1
i2
@@ -27,15 +27,13 @@ b5
u2
u3
u4
+b6-r
+u5-r
e2
e1
);
my $expect = ":" . join(":", @expect);
-# XXX tmp while re-evals are being doubly compiled:
-$expect =
- ':b1:b2:b3:b4:b6:b6:u5:b7:u6:u5:u1:c3:c2:c2:c1:i1:i2:b5:u2:u3:u4:e2:e1';
-
fresh_perl_is(<<'SCRIPT', $expect,{switches => [''], stdin => '', stderr => 1 },'Order of execution of special blocks');
BEGIN {print ":b1"}
END {print ":e1"}
@@ -49,9 +47,18 @@ UNITCHECK {print ":u1"}
eval 'BEGIN {print ":b5"}';
eval 'UNITCHECK {print ":u2"}';
eval 'UNITCHECK {print ":u3"; UNITCHECK {print ":u4"}}';
-"a" =~ /(?{UNITCHECK {print ":u5"};
- CHECK {print ":c2"};
- BEGIN {print ":b6"}})/x;
+"a" =~ /(?{UNITCHECK {print ":u5-c"};
+ CHECK {print ":c2-c"};
+ BEGIN {print ":b6-c"}})/x;
+{
+ use re 'eval';
+ my $runtime = q{
+ (?{UNITCHECK {print ":u5-r"};
+ CHECK {print ":c2-r"};
+ BEGIN {print ":b6-r"}})/
+ };
+ "a" =~ /$runtime/x;
+}
eval {BEGIN {print ":b7"}};
eval {UNITCHECK {print ":u6"}};
eval {INIT {print ":i2"}};
diff --git a/t/re/pat_re_eval.t b/t/re/pat_re_eval.t
index 262e6f367b..57f2fa21b8 100644
--- a/t/re/pat_re_eval.t
+++ b/t/re/pat_re_eval.t
@@ -22,7 +22,7 @@ BEGIN {
}
-plan tests => 214; # Update this when adding/deleting tests.
+plan tests => 217; # Update this when adding/deleting tests.
run_tests() unless caller;
@@ -378,7 +378,7 @@ sub run_tests {
# the most basic: literal code should be in same scope
# as the parent
- tok(1, "A$x" =~ /^A(??{$x})$/, "[$x] literal code");
+ ok("A$x" =~ /^A(??{$x})$/, "[$x] literal code");
# the "don't recompile if pattern unchanged" mechanism
# shouldn't apply to code blocks - recompile every time
@@ -477,6 +477,16 @@ sub run_tests {
"[$x-$yy] literal qr + r6 +lit, outside");
}
}
+
+ # recursive subs should get lexical from the correct pad depth
+
+ sub recurse {
+ my ($n) = @_;
+ return if $n > 2;
+ ok("A$n" =~ /^A(??{$n})$/, "recurse($n)");
+ recurse($n+1);
+ }
+ recurse(0);
}
} # End of sub run_tests
diff --git a/t/re/re_tests b/t/re/re_tests
index 83db4710f1..b43136f0b2 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -537,7 +537,8 @@ a(?{})b cabd y $& ab
a(?{f()+ - c - Missing right curly or square bracket
a(?{{1}+ - c - Missing right curly or square bracket
a(?{}})b - c -
-a(?{"{"})b - c - Sequence (?{...}) not terminated or not {}-balanced
+# XXX tmp disable this test - works for // but not qr// yet
+#a(?{"{"})b ab y - -
a(?{"\{"})b cabd y $& ab
a(?{"{"}})b - c - Sequence (?{...}) not terminated with ')'
a(?{$::bl="\{"}).b caxbd y $::bl {
diff --git a/t/re/reg_eval_scope.t b/t/re/reg_eval_scope.t
index a23321f2d2..1369a5fa61 100644
--- a/t/re/reg_eval_scope.t
+++ b/t/re/reg_eval_scope.t
@@ -23,8 +23,6 @@ fresh_perl_is <<'CODE', '781745', {}, '(?{}) has its own lexical scope';
print $x,$a,$b;
CODE
-on;
-
fresh_perl_is <<'CODE',
for my $x("a".."c") {
$y = 1;
@@ -44,8 +42,6 @@ CODE
{},
'multiple (?{})s in loop with lexicals';
-off;
-
fresh_perl_is <<'CODE', '781745', {}, 'run-time re-eval has its own scope';
use re qw(eval);
my $x = 7; my $a = 4; my $b = 5;
diff --git a/t/run/fresh_perl.t b/t/run/fresh_perl.t
index e1ffc1b823..3e56a0944f 100644
--- a/t/run/fresh_perl.t
+++ b/t/run/fresh_perl.t
@@ -349,9 +349,8 @@ sub foo { local $_ = shift; @_ = split; @_ }
@x = foo(' x y z ');
print "you die joe!\n" unless "@x" eq 'x y z';
########
-/(?{"{"})/ # Check it outside of eval too
+"A" =~ /(?{"{"})/ # Check it outside of eval too
EXPECT
-Sequence (?{...}) not terminated or not {}-balanced in regex; marked by <-- HERE in m/(?{ <-- HERE "{"})/ at - line 1.
########
/(?{"{"}})/ # Check it outside of eval too
EXPECT