diff options
author | David Mitchell <davem@iabyn.com> | 2011-11-30 13:40:15 +0000 |
---|---|---|
committer | David Mitchell <davem@iabyn.com> | 2011-12-19 15:06:05 +0000 |
commit | 55a50eaac0e929d43694e9a84b983e96b95f2a08 (patch) | |
tree | 555b4dc441bb2b0c791e33adf345cad3d60884ce | |
parent | cff13853c58adf9f0a479ee51925f74395dba8ae (diff) | |
download | perl-55a50eaac0e929d43694e9a84b983e96b95f2a08.tar.gz |
preserve code blocks in interpolated qr//s
This now works:
{ my $x = 1; $r = qr/(??{$x})/ }
my $x = 2;
print "ok\n" if "1" =~ /^$r$/;
When a qr// is interpolated into another pattern, the pattern is still
recompiled using the stringified qr, but now the pre-compiled code blocks
from the qr are reused rather than being re-compiled, so it behaves like a
closure.
Note that this makes some tests in regexp_qr_embed_thr.t fail, due to a
pre-existing threads bug, which can be summarised as:
use threads;
my $s = threads->new(sub { return sub { $::x = 1} })->join;
$s->();
print "\$::x=[$::x]\n";
which prints undef, not 1, since the *::x is cloned into the child thread,
then cloned back into the parent as part of the CV (linked from the pad)
being returned in the join. The cloning/join code isn't clever enough
to realise that the back-cloned *::x is the same as the original *::x, so
the main thread ends up with two copies.
This manifests itself in the re tests as
my $re = threads->new( sub { qr/(?{$::x = 1 })/ })->join();
where, since the returned qr// is now a closure, it suffers from the same
glob duplication in the parent.
So I've disabled 4 re_tests tests under threads for now.
-rw-r--r-- | regcomp.c | 77 | ||||
-rw-r--r-- | regcomp.h | 1 | ||||
-rw-r--r-- | regexec.c | 12 | ||||
-rw-r--r-- | regexp.h | 1 | ||||
-rw-r--r-- | t/re/pat_re_eval.t | 43 | ||||
-rw-r--r-- | t/re/re_tests | 11 | ||||
-rw-r--r-- | t/re/reg_eval_scope.t | 5 |
7 files changed, 127 insertions, 23 deletions
@@ -4703,13 +4703,15 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, for (svp = patternp; svp < patternp + pat_count; svp++) { SV *sv, *msv = *svp; + SV *rx; bool code = 0; if (o) { if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) { + assert(n < pRExC_state->num_code_blocks); + pRExC_state->code_blocks[n].start = SvCUR(pat); + pRExC_state->code_blocks[n].block = o; + pRExC_state->code_blocks[n].src_regex = NULL; n++; - assert(n <= pRExC_state->num_code_blocks); - pRExC_state->code_blocks[n-1].start = SvCUR(pat); - pRExC_state->code_blocks[n-1].block = o; code = 1; o = o->op_sibling; /* skip CONST */ assert(o); @@ -4717,6 +4719,40 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, o = o->op_sibling;; } + /* extract any code blocks within any embedded qr//'s */ + rx = msv; + if (SvROK(rx)) + rx = SvRV(rx); + if (SvTYPE(rx) == SVt_REGEXP + && RX_ENGINE((REGEXP*)rx) == RE_ENGINE_PTR) + { + + RXi_GET_DECL(((struct regexp*)SvANY(rx)), ri); + if (ri->num_code_blocks) { + int i; + Renew(pRExC_state->code_blocks, + pRExC_state->num_code_blocks + ri->num_code_blocks, + struct reg_code_block); + pRExC_state->num_code_blocks += ri->num_code_blocks; + for (i=0; i < ri->num_code_blocks; i++) { + struct reg_code_block *src, *dst; + STRLEN offset = SvCUR(pat) + + ((struct regexp *)SvANY(rx))->pre_prefix; + assert(n < pRExC_state->num_code_blocks); + src = &ri->code_blocks[i]; + dst = &pRExC_state->code_blocks[n]; + dst->start = src->start + offset; + dst->end = src->end + offset; + dst->block = src->block; + dst->src_regex = (REGEXP*) SvREFCNT_inc( (SV*) + src->src_regex + ? src->src_regex + : (REGEXP*)rx); + n++; + } + } + } + if ((SvAMAGIC(pat) || SvAMAGIC(msv)) && (sv = amagic_call(pat, msv, concat_amg, AMGf_assign))) { @@ -4783,6 +4819,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, assert(i+1 < pRExC_state->num_code_blocks); pRExC_state->code_blocks[++i].start = SvCUR(pat); pRExC_state->code_blocks[i].block = o; + pRExC_state->code_blocks[i].src_regex = NULL; is_code = 1; } } @@ -4795,7 +4832,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count, exp = SvPV_nomg(pat, plen); - if (eng && eng != &PL_core_reg_engine) { + if (eng && eng != RE_ENGINE_PTR) { if ((SvUTF8(pat) && IN_BYTES) || SvGMAGICAL(pat) || SvAMAGIC(pat)) { @@ -7515,16 +7552,24 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) - RExC_start) ) { /* this is a pre-compiled literal (?{}) */ - RExC_parse = RExC_start + - pRExC_state->code_blocks[pRExC_state->code_index].end; + struct reg_code_block *cb = + &pRExC_state->code_blocks[pRExC_state->code_index]; + RExC_parse = RExC_start + cb->end; if (SIZE_ONLY) RExC_seen_evals++; else { - OP *o = - pRExC_state->code_blocks[pRExC_state->code_index].block; - n = add_data(pRExC_state, 1, + OP *o = cb->block; + if (cb->src_regex) { + n = add_data(pRExC_state, 2, "rl"); + RExC_rxi->data->data[n] = + (void*)SvREFCNT_inc((SV*)cb->src_regex); + RExC_rxi->data->data[n+1] = (void*)o->op_next; + } + else { + n = add_data(pRExC_state, 1, (RExC_flags & PMf_HAS_CV) ? "L" : "l"); - RExC_rxi->data->data[n] = (void*)o->op_next; + RExC_rxi->data->data[n] = (void*)o->op_next; + } } pRExC_state->code_index++; } @@ -12208,8 +12253,12 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx) if (ri->u.offsets) Safefree(ri->u.offsets); /* 20010421 MJD */ #endif - if (ri->code_blocks) + if (ri->code_blocks) { + int n; + for (n = 0; n < ri->num_code_blocks; n++) + SvREFCNT_dec(ri->code_blocks[n].src_regex); Safefree(ri->code_blocks); + } if (ri->data) { int n = ri->data->count; @@ -12221,6 +12270,7 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx) /* If you add a ->what type here, update the comment in regcomp.h */ switch (ri->data->what[n]) { case 'a': + case 'r': case 's': case 'S': case 'u': @@ -12443,10 +12493,14 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param) reti->num_code_blocks = ri->num_code_blocks; if (ri->code_blocks) { + int n; Newxc(reti->code_blocks, ri->num_code_blocks, struct reg_code_block, struct reg_code_block); Copy(ri->code_blocks, reti->code_blocks, ri->num_code_blocks, struct reg_code_block); + for (n = 0; n < ri->num_code_blocks; n++) + reti->code_blocks[n].src_regex = (REGEXP*) + sv_dup_inc((SV*)(ri->code_blocks[n].src_regex), param); } else reti->code_blocks = NULL; @@ -12469,6 +12523,7 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param) /* legal options are one of: sSfpontTua see also regcomp.h and pregfree() */ case 'a': /* actually an AV, but the dup function is identical. */ + case 'r': case 's': case 'S': case 'p': /* actually an AV, but the dup function is identical. */ @@ -539,6 +539,7 @@ END_EXTERN_C * n - Root of op tree for (?{EVAL}) item * o - Start op for (?{EVAL}) item * p - Pad for (?{EVAL}) item + * r - pointer to an embedded code-containing qr, e.g. /ab$qr/ * s - swash for Unicode-style character class, and the multicharacter * strings resulting from casefolding the single-character entries * in the character class @@ -4256,7 +4256,17 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) Copy(&PL_reg_state, &saved_state, 1, struct re_save_state); n = ARG(scan); - if (rexi->data->what[n] == 'l') { /* literal code */ + if (rexi->data->what[n] == 'r') { /* code from an external qr */ + /* XXX assumes pad depth is 1; this isn't necessarily + * the case with recursive qr//'s */ + new_comppad = (PAD*)AvARRAY(CvPADLIST( + ((struct regexp *)SvANY( + (REGEXP*)(rexi->data->data[n]) + ))->qr_anoncv + ))[1]; + PL_op = (OP_4tree*)rexi->data->data[n+1]; + } + else if (rexi->data->what[n] == 'l') { /* literal code */ new_comppad = initial_pad; /* the pad of the current sub */ PL_op = (OP_4tree*)rexi->data->data[n]; } @@ -62,6 +62,7 @@ struct reg_code_block { STRLEN start; STRLEN end; OP *block; + REGEXP *src_regex; }; diff --git a/t/re/pat_re_eval.t b/t/re/pat_re_eval.t index cf38bc9a71..3042b814ed 100644 --- a/t/re/pat_re_eval.t +++ b/t/re/pat_re_eval.t @@ -22,7 +22,7 @@ BEGIN { } -plan tests => 242; # Update this when adding/deleting tests. +plan tests => 245; # Update this when adding/deleting tests. run_tests() unless caller; @@ -59,7 +59,8 @@ sub run_tests { no re "eval"; undef $@; - my $match = eval { /$a$c$a/ }; + my $d = '(?{1})'; + my $match = eval { /$a$c$a$d/ }; ok($@ && $@ =~ /Eval-group not allowed/ && !$match, $message); is($b, '14', $message); @@ -528,6 +529,44 @@ sub run_tests { ok("bcd" =~ $r, "qr with run-time elements and code block"); } + # check that cascaded embedded regexes all see their own lexical + # environment + + { + my ($r1, $r2, $r3, $r4); + my ($x1, $x2, $x3, $x4) = (5,6,7,8); + { my $x1 = 1; $r1 = qr/A(??{$x1})/; } + { my $x2 = 2; $r2 = qr/$r1(??{$x2})/; } + { my $x3 = 3; $r3 = qr/$r2(??{$x3})/; } + { my $x4 = 4; $r4 = qr/$r3(??{$x4})/; } + ok("A1234" =~ /^$r4$/, "cascaded qr"); + } + + # and again, but in a loop, with no external references + # being maintained to the qr's + + { + my $r = 'A'; + for my $x (1..4) { + $r = qr/$r(??{$x})/; + } + my $x = 5; + ok("A1234" =~ /^$r$/, "cascaded qr loop"); + } + + + # and again, but compiling the qrs in an eval so there + # aren't even refs to the qrs from any ops + + { + my $r = 'A'; + for my $x (1..4) { + $r = eval q[ qr/$r(??{$x})/; ]; + } + my $x = 5; + ok("A1234" =~ /^$r$/, "cascaded qr loop"); + } + # forward declared subs should Do The Right Thing with any anon CVs # within them (i.e. pad_fixup_inner_anons() should work) diff --git a/t/re/re_tests b/t/re/re_tests index 29da142a86..6aca523281 100644 --- a/t/re/re_tests +++ b/t/re/re_tests @@ -537,11 +537,10 @@ a(?{})b cabd y $& ab a(?{f()+ - c - Missing right curly or square bracket a(?{{1}+ - c - Missing right curly or square bracket a(?{}})b - c - -# XXX tmp disable this test - doesn't work for embedded qr// yet -#a(?{"{"})b ab y - - +a(?{"{"})b ab y - - a(?{"\{"})b cabd y $& ab a(?{"{"}})b - c - Sequence (?{...}) not terminated with ')' -a(?{$::bl="\{"}).b caxbd y $::bl { +a(?{$::bl="\{"}).b caxbd t $::bl { x(~~)*(?:(?:F)?)? x~~ y - - ^a(?#xxx){3}c aaac y $& aaac '^a (?#xxx) (?#yyy) {3}c'x aaac y $& aaac @@ -606,8 +605,8 @@ $(?<=^(a)) a y $1 a ([\w:]+::)?(\w+)$ abcd y $1-$2 -abcd ([\w:]+::)?(\w+)$ xy:z:::abcd y $1-$2 xy:z:::-abcd ^[^bcd]*(c+) aexycd y $1 c -(?{$a=2})a*aa(?{local$a=$a+1})k*c(?{$b=$a}) yaaxxaaaacd y $b 3 -(?{$a=2})(a(?{local$a=$a+1}))*aak*c(?{$b=$a}) yaaxxaaaacd y $b 4 +(?{$a=2})a*aa(?{local$a=$a+1})k*c(?{$b=$a}) yaaxxaaaacd t $b 3 +(?{$a=2})(a(?{local$a=$a+1}))*aak*c(?{$b=$a}) yaaxxaaaacd t $b 4 (>a+)ab aaab n - - (?>a+)b aaab y - - ([[:]+) a:[b]: y $1 :[ @@ -851,7 +850,7 @@ abb$ b\nca n - - 'abb$'m b\nca n - - (^|x)(c) ca y $2 c a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz x n - - -a(?{$a=2;$b=3;($b)=$a})b yabz y $b 2 +a(?{$a=2;$b=3;($b)=$a})b yabz t $b 2 round\(((?>[^()]+))\) _I(round(xs * sz),1) y $1 xs * sz '((?x:.) )' x y $1- x - '((?-x:.) )'x x y $1- x- diff --git a/t/re/reg_eval_scope.t b/t/re/reg_eval_scope.t index 1369a5fa61..51d41ec988 100644 --- a/t/re/reg_eval_scope.t +++ b/t/re/reg_eval_scope.t @@ -88,7 +88,7 @@ on; fresh_perl_is <<'CODE', '123123', {}, for my $x(1..3) { - push @regexps = qr/(?{ print $x })a/; + push @regexps, qr/(?{ print $x })a/; } "a" =~ $_ for @regexps; "ba" =~ /b$_/ for @regexps; @@ -102,8 +102,7 @@ is $pack, 'foo', 'qr// inherits package'; "a" =~ do { use re "/x"; qr/(?{ $::re = qr-- })a/ }; is $re, '(?^x:)', 'qr// inherits pragmata'; -on; - +$::pack = ''; "ba" =~ /b${\do { package baz; qr|(?{ $::pack = __PACKAGE__ })a| }}/; is $pack, 'baz', '/text$qr/ inherits package'; "ba" =~ m+b${\do { use re "/i"; qr|(?{ $::re = qr-- })a| }}+; |