summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2011-11-12 20:51:27 +0000
committerDavid Mitchell <davem@iabyn.com>2011-12-19 15:06:04 +0000
commitda8e5eefb08d803d1779699680ec94dc430a4137 (patch)
treebd764213bc54806ecdc4ce754f0012a5e62895d5
parentf04fdf0b48a5778d12e39a4b8eb3845f26e351e7 (diff)
downloadperl-da8e5eefb08d803d1779699680ec94dc430a4137.tar.gz
Handle literal code blocks in runtime regexes
In the following types of regex: /$runtime(?{...})/ qr/$runtime(?{...})/ make it so that the code block is compiled at the same time that the surrounding code is compiled, then is incorporated, rather than re-compiled, when the regex source is assembled and compiled at runtime. This fixes a bunch of closure-related TODO tests. Note that this still doesn't yet handle the cases where $runtime contains: $runtime = qr/...(?{...})/; # block will be stringified and recompiled $runtime = '(?{...})'; # block compiled the old way, with matching nesting of {} required It also doesn't yet handle the case where the pattern getting compiled is upgraded to utf8 and so is restarted. Note that this is rather complex, because in something like $str =~ qr/$a(?{...})$b[1]/ there are four separate phases * perl compile time; we also compile the code block at the same time, but within a separate anon CV (with a separate pad) * at run time, we execute the code that generates the list of SVs (i.e. $a, $b[1] etc), but have to execute them within the context of the anon sub, since that's what they were compiled in; we then have to concat the arguments, while remembering which were literal code blocks; * then qr// clones the compiled regex, and clones the anon CV at the same time; * finally, the pattern is executed. Through all this we have to ensure that the code blocks and associated anon CV and pad get preserved and incorporated into the right places for eventual use. The changes in this commit build upon the work in the previous few commits, and work by: * at (perl) compile time, in pmruntime(), the anon CV (if any) associated with a qr//, as well as being referred to by the op_targ of the anoncode op, is also made the targ of the regcomp op; * at pattern assembly and compile time, * Perl_re_op_compile() takes the list of SVs gathered by pp_regcomp(), along with the op tree (from op_code_list) that was used to generate those SVs (as well as containing the individual DO blocks), and concatenates them to get a final pattern source string, while noting the start and end positions of any literal (?{..})'s, and which block they must correspond to. * after compilation, pp_regcomp() then uses op_targ to locate the anon CV and store a pointer to it in the regex. qr// instantiation and execution work unchanged.
-rw-r--r--op.c11
-rw-r--r--pp_ctl.c5
-rw-r--r--regcomp.c38
-rw-r--r--t/re/pat_re_eval.t12
4 files changed, 54 insertions, 12 deletions
diff --git a/op.c b/op.c
index 517e6aeeea..2ebdaad257 100644
--- a/op.c
+++ b/op.c
@@ -4402,6 +4402,7 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg, I32 floor)
else {
/* runtime pattern: build chain of regcomp etc ops */
bool reglist;
+ PADOFFSET cv_targ = 0;
reglist = isreg && expr->op_type == OP_LIST;
if (reglist)
@@ -4454,8 +4455,13 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg, I32 floor)
*/
SvREFCNT_inc_simple_void(PL_compcv);
- expr = list(force_list(newUNOP(OP_ENTERSUB, 0,
- scalar(newANONATTRSUB(floor, NULL, NULL, expr)))));
+ /* these lines are just an unrolled newANONATTRSUB */
+ expr = newSVOP(OP_ANONCODE, 0,
+ MUTABLE_SV(newATTRSUB(floor, 0, NULL, NULL, expr)));
+ cv_targ = expr->op_targ;
+ expr = newUNOP(OP_REFGEN, 0, expr);
+
+ expr = list(force_list(newUNOP(OP_ENTERSUB, 0, scalar(expr))));
}
NewOp(1101, rcop, 1, LOGOP);
@@ -4467,6 +4473,7 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg, I32 floor)
| (reglist ? OPf_STACKED : 0);
rcop->op_private = 0;
rcop->op_other = o;
+ rcop->op_targ = cv_targ;
/* /$x/ may cause an eval, since $x might be qr/(?{..})/ */
if (PL_hints & HINT_RE_EVAL) PL_cv_has_eval = 1;
diff --git a/pp_ctl.c b/pp_ctl.c
index e4bd457b84..4855f26700 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -114,7 +114,10 @@ PP(pp_regcomp)
PL_reginterp_cnt = (I32_MAX>>1); /* Mark as safe. */
new_re = re_op_compile(args, nargs, pm->op_code_list, eng, re,
- &is_bare_re, (pm->op_pmflags & RXf_PMf_COMPILETIME));
+ &is_bare_re,
+ (pm->op_pmflags & (RXf_PMf_COMPILETIME|PMf_HAS_CV)));
+ if (pm->op_pmflags & PMf_HAS_CV)
+ ((struct regexp *)SvANY(new_re))->qr_anoncv = PAD_SV(PL_op->op_targ);
if (is_bare_re) {
REGEXP *tmp;
diff --git a/regcomp.c b/regcomp.c
index af3597e040..d6cd2df3b5 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -4708,18 +4708,50 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
}
if (pat_count > 1) {
- /* concat multiple args */
+ /* concat multiple args and find any code block indexes */
+
+ OP *o = NULL;
+ int n = 0;
+
+ if (pRExC_state->num_code_blocks) {
+ o = cLISTOPx(expr)->op_first;
+ assert(o->op_type == OP_PUSHMARK);
+ o = o->op_sibling;
+ }
- pRExC_state->num_code_blocks = 0; /* XXX tmp */
pat = newSVpvn("", 0);
SAVEFREESV(pat);
for (svp = patternp; svp < patternp + pat_count; svp++) {
SV *sv, *msv = *svp;
+ bool code = 0;
+ if (o) {
+ if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) {
+ n++;
+ assert(n <= pRExC_state->num_code_blocks);
+ pRExC_state->code_blocks[n-1].start = SvCUR(pat);
+ pRExC_state->code_blocks[n-1].block = o;
+ code = 1;
+ o = o->op_sibling; /* skip CONST */
+ assert(o);
+ }
+ o = o->op_sibling;;
+ }
+
if ((SvAMAGIC(pat) || SvAMAGIC(msv)) &&
(sv = amagic_call(pat, msv, concat_amg, AMGf_assign)))
+ {
sv_setsv(pat, sv);
- else
+ /* overloading involved: all bets are off over literal
+ * code. Pretend we haven't seen it */
+ pRExC_state->num_code_blocks -= n;
+ n = 0;
+
+ }
+ else {
sv_catsv_nomg(pat, msv);
+ if (code)
+ pRExC_state->code_blocks[n-1].end = SvCUR(pat);
+ }
}
SvSETMAGIC(pat);
}
diff --git a/t/re/pat_re_eval.t b/t/re/pat_re_eval.t
index 2a16027b3d..4eef9d166f 100644
--- a/t/re/pat_re_eval.t
+++ b/t/re/pat_re_eval.t
@@ -400,12 +400,12 @@ sub run_tests {
# literal qr code only created once, embedded with text
$cr2 //= qr/B(??{$x})$/;
- tok(0, "ABa" =~ /^A$cr2/, "[$x] literal qr once embedded text");
+ ok("ABa" =~ /^A$cr2/, "[$x] literal qr once embedded text");
# literal qr code only created once, embedded with text + lit code
$cr3 //= qr/C(??{$x})$/;
- tok($bc, "A$x-BCa" =~ /^A(??{$x})-B$cr3/,
+ ok("A$x-BCa" =~ /^A(??{$x})-B$cr3/,
"[$x] literal qr once embedded text + lit code");
# literal qr code only created once, embedded with text + run code
@@ -451,12 +451,12 @@ sub run_tests {
my $rr5 = qr/^A(??{"$x$y"})-$r5/;
push @rr5, $rr5;
- tok("$x$y" ne "ad", "A$x$y-C$x" =~ $rr5,
+ tok($bc, "A$x$y-C$x" =~ $rr5,
"[$x-$y] literal qr + r5");
my $rr6 = qr/^A(??{"$x$y"})-$r6/;
push @rr6, $rr6;
- tok("$x$y" ne "ad", "A$x$y-$x-C$x" =~ $rr6,
+ tok($bc, "A$x$y-$x-C$x" =~ $rr6,
"[$x-$y] literal qr + r6");
}
@@ -464,14 +464,14 @@ sub run_tests {
my $y = 'Y';
my $yy = (qw(d e f))[$i];
my $rr5 = $rr5[$i];
- tok("$x$yy" ne "ad", "A$x$yy-C$x" =~ $rr5,
+ tok($bc, "A$x$yy-C$x" =~ $rr5,
"[$x-$yy] literal qr + r5, outside");
tok(1, "A$x$yy-C$x-D$x" =~ /$rr5-D(??{$x})/,
"[$x-$yy] literal qr + r5 + lit, outside");
my $rr6 = $rr6[$i];
push @rr6, $rr6;
- tok("$x$yy" ne "ad", "A$x$yy-$x-C$x" =~ $rr6,
+ tok($bc, "A$x$yy-$x-C$x" =~ $rr6,
"[$x-$yy] literal qr + r6, outside");
tok(1, "A$x$yy-$x-C$x-D$x" =~ /$rr6-D(??{$x})/,
"[$x-$yy] literal qr + r6 +lit, outside");