diff options
author | David Mitchell <davem@iabyn.com> | 2011-11-12 20:51:27 +0000 |
---|---|---|
committer | David Mitchell <davem@iabyn.com> | 2011-12-19 15:06:04 +0000 |
commit | da8e5eefb08d803d1779699680ec94dc430a4137 (patch) | |
tree | bd764213bc54806ecdc4ce754f0012a5e62895d5 | |
parent | f04fdf0b48a5778d12e39a4b8eb3845f26e351e7 (diff) | |
download | perl-da8e5eefb08d803d1779699680ec94dc430a4137.tar.gz |
Handle literal code blocks in runtime regexes
In the following types of regex:
/$runtime(?{...})/
qr/$runtime(?{...})/
make it so that the code block is compiled at the same time that the
surrounding code is compiled, then is incorporated, rather than
re-compiled, when the regex source is assembled and compiled at runtime.
This fixes a bunch of closure-related TODO tests.
Note that this still doesn't yet handle the cases where $runtime contains:
$runtime = qr/...(?{...})/; # block will be stringified and recompiled
$runtime = '(?{...})'; # block compiled the old way, with
matching nesting of {} required
It also doesn't yet handle the case where the pattern getting compiled is
upgraded to utf8 and so is restarted.
Note that this is rather complex, because in something like
$str =~ qr/$a(?{...})$b[1]/
there are four separate phases
* perl compile time; we also compile the code block at the same time,
but within a separate anon CV (with a separate pad)
* at run time, we execute the code that generates the list of SVs
(i.e. $a, $b[1] etc), but have to execute them within the context of the
anon sub, since that's what they were compiled in; we then have to
concat the arguments, while remembering which were literal code blocks;
* then qr// clones the compiled regex, and clones the anon CV at the same
time;
* finally, the pattern is executed.
Through all this we have to ensure that the code blocks and associated
anon CV and pad get preserved and incorporated into the right places
for eventual use.
The changes in this commit build upon the work in the previous few
commits, and work by:
* at (perl) compile time, in pmruntime(), the anon CV (if any) associated
with a qr//, as well as being referred to by the op_targ of the
anoncode op, is also made the targ of the regcomp op;
* at pattern assembly and compile time,
* Perl_re_op_compile() takes the list of SVs gathered by pp_regcomp(),
along with the op tree (from op_code_list) that was used to generate
those SVs (as well as containing the individual DO blocks), and
concatenates them to get a final pattern source string, while
noting the start and end positions of any literal (?{..})'s,
and which block they must correspond to.
* after compilation, pp_regcomp() then uses op_targ to locate
the anon CV and store a pointer to it in the regex.
qr// instantiation and execution work unchanged.
-rw-r--r-- | op.c | 11 | ||||
-rw-r--r-- | pp_ctl.c | 5 | ||||
-rw-r--r-- | regcomp.c | 38 | ||||
-rw-r--r-- | t/re/pat_re_eval.t | 12 |
4 files changed, 54 insertions, 12 deletions
@@ -4402,6 +4402,7 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg, I32 floor) else { /* runtime pattern: build chain of regcomp etc ops */ bool reglist; + PADOFFSET cv_targ = 0; reglist = isreg && expr->op_type == OP_LIST; if (reglist) @@ -4454,8 +4455,13 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg, I32 floor) */ SvREFCNT_inc_simple_void(PL_compcv); - expr = list(force_list(newUNOP(OP_ENTERSUB, 0, - scalar(newANONATTRSUB(floor, NULL, NULL, expr))))); + /* these lines are just an unrolled newANONATTRSUB */ + expr = newSVOP(OP_ANONCODE, 0, + MUTABLE_SV(newATTRSUB(floor, 0, NULL, NULL, expr))); + cv_targ = expr->op_targ; + expr = newUNOP(OP_REFGEN, 0, expr); + + expr = list(force_list(newUNOP(OP_ENTERSUB, 0, scalar(expr)))); } NewOp(1101, rcop, 1, LOGOP); @@ -4467,6 +4473,7 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg, I32 floor) | (reglist ? OPf_STACKED : 0); rcop->op_private = 0; rcop->op_other = o; + rcop->op_targ = cv_targ; /* /$x/ may cause an eval, since $x might be qr/(?{..})/ */ if (PL_hints & HINT_RE_EVAL) PL_cv_has_eval = 1; @@ -114,7 +114,10 @@ PP(pp_regcomp) PL_reginterp_cnt = (I32_MAX>>1); /* Mark as safe. */ new_re = re_op_compile(args, nargs, pm->op_code_list, eng, re, - &is_bare_re, (pm->op_pmflags & RXf_PMf_COMPILETIME)); + &is_bare_re, + (pm->op_pmflags & (RXf_PMf_COMPILETIME|PMf_HAS_CV))); + if (pm->op_pmflags & PMf_HAS_CV) + ((struct regexp *)SvANY(new_re))->qr_anoncv = PAD_SV(PL_op->op_targ); if (is_bare_re) { REGEXP *tmp; @@ -4708,18 +4708,50 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, } if (pat_count > 1) { - /* concat multiple args */ + /* concat multiple args and find any code block indexes */ + + OP *o = NULL; + int n = 0; + + if (pRExC_state->num_code_blocks) { + o = cLISTOPx(expr)->op_first; + assert(o->op_type == OP_PUSHMARK); + o = o->op_sibling; + } - pRExC_state->num_code_blocks = 0; /* XXX tmp */ pat = newSVpvn("", 0); SAVEFREESV(pat); for (svp = patternp; svp < patternp + pat_count; svp++) { SV *sv, *msv = *svp; + bool code = 0; + if (o) { + if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) { + n++; + assert(n <= pRExC_state->num_code_blocks); + pRExC_state->code_blocks[n-1].start = SvCUR(pat); + pRExC_state->code_blocks[n-1].block = o; + code = 1; + o = o->op_sibling; /* skip CONST */ + assert(o); + } + o = o->op_sibling;; + } + if ((SvAMAGIC(pat) || SvAMAGIC(msv)) && (sv = amagic_call(pat, msv, concat_amg, AMGf_assign))) + { sv_setsv(pat, sv); - else + /* overloading involved: all bets are off over literal + * code. Pretend we haven't seen it */ + pRExC_state->num_code_blocks -= n; + n = 0; + + } + else { sv_catsv_nomg(pat, msv); + if (code) + pRExC_state->code_blocks[n-1].end = SvCUR(pat); + } } SvSETMAGIC(pat); } diff --git a/t/re/pat_re_eval.t b/t/re/pat_re_eval.t index 2a16027b3d..4eef9d166f 100644 --- a/t/re/pat_re_eval.t +++ b/t/re/pat_re_eval.t @@ -400,12 +400,12 @@ sub run_tests { # literal qr code only created once, embedded with text $cr2 //= qr/B(??{$x})$/; - tok(0, "ABa" =~ /^A$cr2/, "[$x] literal qr once embedded text"); + ok("ABa" =~ /^A$cr2/, "[$x] literal qr once embedded text"); # literal qr code only created once, embedded with text + lit code $cr3 //= qr/C(??{$x})$/; - tok($bc, "A$x-BCa" =~ /^A(??{$x})-B$cr3/, + ok("A$x-BCa" =~ /^A(??{$x})-B$cr3/, "[$x] literal qr once embedded text + lit code"); # literal qr code only created once, embedded with text + run code @@ -451,12 +451,12 @@ sub run_tests { my $rr5 = qr/^A(??{"$x$y"})-$r5/; push @rr5, $rr5; - tok("$x$y" ne "ad", "A$x$y-C$x" =~ $rr5, + tok($bc, "A$x$y-C$x" =~ $rr5, "[$x-$y] literal qr + r5"); my $rr6 = qr/^A(??{"$x$y"})-$r6/; push @rr6, $rr6; - tok("$x$y" ne "ad", "A$x$y-$x-C$x" =~ $rr6, + tok($bc, "A$x$y-$x-C$x" =~ $rr6, "[$x-$y] literal qr + r6"); } @@ -464,14 +464,14 @@ sub run_tests { my $y = 'Y'; my $yy = (qw(d e f))[$i]; my $rr5 = $rr5[$i]; - tok("$x$yy" ne "ad", "A$x$yy-C$x" =~ $rr5, + tok($bc, "A$x$yy-C$x" =~ $rr5, "[$x-$yy] literal qr + r5, outside"); tok(1, "A$x$yy-C$x-D$x" =~ /$rr5-D(??{$x})/, "[$x-$yy] literal qr + r5 + lit, outside"); my $rr6 = $rr6[$i]; push @rr6, $rr6; - tok("$x$yy" ne "ad", "A$x$yy-$x-C$x" =~ $rr6, + tok($bc, "A$x$yy-$x-C$x" =~ $rr6, "[$x-$yy] literal qr + r6, outside"); tok(1, "A$x$yy-$x-C$x-D$x" =~ /$rr6-D(??{$x})/, "[$x-$yy] literal qr + r6 +lit, outside"); |