summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2011-11-30 13:40:15 +0000
committerDavid Mitchell <davem@iabyn.com>2011-12-19 15:06:05 +0000
commit55a50eaac0e929d43694e9a84b983e96b95f2a08 (patch)
tree555b4dc441bb2b0c791e33adf345cad3d60884ce
parentcff13853c58adf9f0a479ee51925f74395dba8ae (diff)
downloadperl-55a50eaac0e929d43694e9a84b983e96b95f2a08.tar.gz
preserve code blocks in interpolated qr//s
This now works: { my $x = 1; $r = qr/(??{$x})/ } my $x = 2; print "ok\n" if "1" =~ /^$r$/; When a qr// is interpolated into another pattern, the pattern is still recompiled using the stringified qr, but now the pre-compiled code blocks from the qr are reused rather than being re-compiled, so it behaves like a closure. Note that this makes some tests in regexp_qr_embed_thr.t fail, due to a pre-existing threads bug, which can be summarised as: use threads; my $s = threads->new(sub { return sub { $::x = 1} })->join; $s->(); print "\$::x=[$::x]\n"; which prints undef, not 1, since the *::x is cloned into the child thread, then cloned back into the parent as part of the CV (linked from the pad) being returned in the join. The cloning/join code isn't clever enough to realise that the back-cloned *::x is the same as the original *::x, so the main thread ends up with two copies. This manifests itself in the re tests as my $re = threads->new( sub { qr/(?{$::x = 1 })/ })->join(); where, since the returned qr// is now a closure, it suffers from the same glob duplication in the parent. So I've disabled 4 re_tests tests under threads for now.
-rw-r--r--regcomp.c77
-rw-r--r--regcomp.h1
-rw-r--r--regexec.c12
-rw-r--r--regexp.h1
-rw-r--r--t/re/pat_re_eval.t43
-rw-r--r--t/re/re_tests11
-rw-r--r--t/re/reg_eval_scope.t5
7 files changed, 127 insertions, 23 deletions
diff --git a/regcomp.c b/regcomp.c
index ef19a0cac1..31e8431163 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -4703,13 +4703,15 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
for (svp = patternp; svp < patternp + pat_count; svp++) {
SV *sv, *msv = *svp;
+ SV *rx;
bool code = 0;
if (o) {
if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) {
+ assert(n < pRExC_state->num_code_blocks);
+ pRExC_state->code_blocks[n].start = SvCUR(pat);
+ pRExC_state->code_blocks[n].block = o;
+ pRExC_state->code_blocks[n].src_regex = NULL;
n++;
- assert(n <= pRExC_state->num_code_blocks);
- pRExC_state->code_blocks[n-1].start = SvCUR(pat);
- pRExC_state->code_blocks[n-1].block = o;
code = 1;
o = o->op_sibling; /* skip CONST */
assert(o);
@@ -4717,6 +4719,40 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
o = o->op_sibling;;
}
+ /* extract any code blocks within any embedded qr//'s */
+ rx = msv;
+ if (SvROK(rx))
+ rx = SvRV(rx);
+ if (SvTYPE(rx) == SVt_REGEXP
+ && RX_ENGINE((REGEXP*)rx) == RE_ENGINE_PTR)
+ {
+
+ RXi_GET_DECL(((struct regexp*)SvANY(rx)), ri);
+ if (ri->num_code_blocks) {
+ int i;
+ Renew(pRExC_state->code_blocks,
+ pRExC_state->num_code_blocks + ri->num_code_blocks,
+ struct reg_code_block);
+ pRExC_state->num_code_blocks += ri->num_code_blocks;
+ for (i=0; i < ri->num_code_blocks; i++) {
+ struct reg_code_block *src, *dst;
+ STRLEN offset = SvCUR(pat)
+ + ((struct regexp *)SvANY(rx))->pre_prefix;
+ assert(n < pRExC_state->num_code_blocks);
+ src = &ri->code_blocks[i];
+ dst = &pRExC_state->code_blocks[n];
+ dst->start = src->start + offset;
+ dst->end = src->end + offset;
+ dst->block = src->block;
+ dst->src_regex = (REGEXP*) SvREFCNT_inc( (SV*)
+ src->src_regex
+ ? src->src_regex
+ : (REGEXP*)rx);
+ n++;
+ }
+ }
+ }
+
if ((SvAMAGIC(pat) || SvAMAGIC(msv)) &&
(sv = amagic_call(pat, msv, concat_amg, AMGf_assign)))
{
@@ -4783,6 +4819,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
assert(i+1 < pRExC_state->num_code_blocks);
pRExC_state->code_blocks[++i].start = SvCUR(pat);
pRExC_state->code_blocks[i].block = o;
+ pRExC_state->code_blocks[i].src_regex = NULL;
is_code = 1;
}
}
@@ -4795,7 +4832,7 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
exp = SvPV_nomg(pat, plen);
- if (eng && eng != &PL_core_reg_engine) {
+ if (eng && eng != RE_ENGINE_PTR) {
if ((SvUTF8(pat) && IN_BYTES)
|| SvGMAGICAL(pat) || SvAMAGIC(pat))
{
@@ -7515,16 +7552,24 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
- RExC_start)
) {
/* this is a pre-compiled literal (?{}) */
- RExC_parse = RExC_start +
- pRExC_state->code_blocks[pRExC_state->code_index].end;
+ struct reg_code_block *cb =
+ &pRExC_state->code_blocks[pRExC_state->code_index];
+ RExC_parse = RExC_start + cb->end;
if (SIZE_ONLY)
RExC_seen_evals++;
else {
- OP *o =
- pRExC_state->code_blocks[pRExC_state->code_index].block;
- n = add_data(pRExC_state, 1,
+ OP *o = cb->block;
+ if (cb->src_regex) {
+ n = add_data(pRExC_state, 2, "rl");
+ RExC_rxi->data->data[n] =
+ (void*)SvREFCNT_inc((SV*)cb->src_regex);
+ RExC_rxi->data->data[n+1] = (void*)o->op_next;
+ }
+ else {
+ n = add_data(pRExC_state, 1,
(RExC_flags & PMf_HAS_CV) ? "L" : "l");
- RExC_rxi->data->data[n] = (void*)o->op_next;
+ RExC_rxi->data->data[n] = (void*)o->op_next;
+ }
}
pRExC_state->code_index++;
}
@@ -12208,8 +12253,12 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
if (ri->u.offsets)
Safefree(ri->u.offsets); /* 20010421 MJD */
#endif
- if (ri->code_blocks)
+ if (ri->code_blocks) {
+ int n;
+ for (n = 0; n < ri->num_code_blocks; n++)
+ SvREFCNT_dec(ri->code_blocks[n].src_regex);
Safefree(ri->code_blocks);
+ }
if (ri->data) {
int n = ri->data->count;
@@ -12221,6 +12270,7 @@ Perl_regfree_internal(pTHX_ REGEXP * const rx)
/* If you add a ->what type here, update the comment in regcomp.h */
switch (ri->data->what[n]) {
case 'a':
+ case 'r':
case 's':
case 'S':
case 'u':
@@ -12443,10 +12493,14 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
reti->num_code_blocks = ri->num_code_blocks;
if (ri->code_blocks) {
+ int n;
Newxc(reti->code_blocks, ri->num_code_blocks, struct reg_code_block,
struct reg_code_block);
Copy(ri->code_blocks, reti->code_blocks, ri->num_code_blocks,
struct reg_code_block);
+ for (n = 0; n < ri->num_code_blocks; n++)
+ reti->code_blocks[n].src_regex = (REGEXP*)
+ sv_dup_inc((SV*)(ri->code_blocks[n].src_regex), param);
}
else
reti->code_blocks = NULL;
@@ -12469,6 +12523,7 @@ Perl_regdupe_internal(pTHX_ REGEXP * const rx, CLONE_PARAMS *param)
/* legal options are one of: sSfpontTua
see also regcomp.h and pregfree() */
case 'a': /* actually an AV, but the dup function is identical. */
+ case 'r':
case 's':
case 'S':
case 'p': /* actually an AV, but the dup function is identical. */
diff --git a/regcomp.h b/regcomp.h
index fbf319565e..0f4554039d 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -539,6 +539,7 @@ END_EXTERN_C
* n - Root of op tree for (?{EVAL}) item
* o - Start op for (?{EVAL}) item
* p - Pad for (?{EVAL}) item
+ * r - pointer to an embedded code-containing qr, e.g. /ab$qr/
* s - swash for Unicode-style character class, and the multicharacter
* strings resulting from casefolding the single-character entries
* in the character class
diff --git a/regexec.c b/regexec.c
index 6b1dc25602..b301db2d55 100644
--- a/regexec.c
+++ b/regexec.c
@@ -4256,7 +4256,17 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
Copy(&PL_reg_state, &saved_state, 1, struct re_save_state);
n = ARG(scan);
- if (rexi->data->what[n] == 'l') { /* literal code */
+ if (rexi->data->what[n] == 'r') { /* code from an external qr */
+ /* XXX assumes pad depth is 1; this isn't necessarily
+ * the case with recursive qr//'s */
+ new_comppad = (PAD*)AvARRAY(CvPADLIST(
+ ((struct regexp *)SvANY(
+ (REGEXP*)(rexi->data->data[n])
+ ))->qr_anoncv
+ ))[1];
+ PL_op = (OP_4tree*)rexi->data->data[n+1];
+ }
+ else if (rexi->data->what[n] == 'l') { /* literal code */
new_comppad = initial_pad; /* the pad of the current sub */
PL_op = (OP_4tree*)rexi->data->data[n];
}
diff --git a/regexp.h b/regexp.h
index dca59dad4a..1fb10bf02a 100644
--- a/regexp.h
+++ b/regexp.h
@@ -62,6 +62,7 @@ struct reg_code_block {
STRLEN start;
STRLEN end;
OP *block;
+ REGEXP *src_regex;
};
diff --git a/t/re/pat_re_eval.t b/t/re/pat_re_eval.t
index cf38bc9a71..3042b814ed 100644
--- a/t/re/pat_re_eval.t
+++ b/t/re/pat_re_eval.t
@@ -22,7 +22,7 @@ BEGIN {
}
-plan tests => 242; # Update this when adding/deleting tests.
+plan tests => 245; # Update this when adding/deleting tests.
run_tests() unless caller;
@@ -59,7 +59,8 @@ sub run_tests {
no re "eval";
undef $@;
- my $match = eval { /$a$c$a/ };
+ my $d = '(?{1})';
+ my $match = eval { /$a$c$a$d/ };
ok($@ && $@ =~ /Eval-group not allowed/ && !$match, $message);
is($b, '14', $message);
@@ -528,6 +529,44 @@ sub run_tests {
ok("bcd" =~ $r, "qr with run-time elements and code block");
}
+ # check that cascaded embedded regexes all see their own lexical
+ # environment
+
+ {
+ my ($r1, $r2, $r3, $r4);
+ my ($x1, $x2, $x3, $x4) = (5,6,7,8);
+ { my $x1 = 1; $r1 = qr/A(??{$x1})/; }
+ { my $x2 = 2; $r2 = qr/$r1(??{$x2})/; }
+ { my $x3 = 3; $r3 = qr/$r2(??{$x3})/; }
+ { my $x4 = 4; $r4 = qr/$r3(??{$x4})/; }
+ ok("A1234" =~ /^$r4$/, "cascaded qr");
+ }
+
+ # and again, but in a loop, with no external references
+ # being maintained to the qr's
+
+ {
+ my $r = 'A';
+ for my $x (1..4) {
+ $r = qr/$r(??{$x})/;
+ }
+ my $x = 5;
+ ok("A1234" =~ /^$r$/, "cascaded qr loop");
+ }
+
+
+ # and again, but compiling the qrs in an eval so there
+ # aren't even refs to the qrs from any ops
+
+ {
+ my $r = 'A';
+ for my $x (1..4) {
+ $r = eval q[ qr/$r(??{$x})/; ];
+ }
+ my $x = 5;
+ ok("A1234" =~ /^$r$/, "cascaded qr loop");
+ }
+
# forward declared subs should Do The Right Thing with any anon CVs
# within them (i.e. pad_fixup_inner_anons() should work)
diff --git a/t/re/re_tests b/t/re/re_tests
index 29da142a86..6aca523281 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -537,11 +537,10 @@ a(?{})b cabd y $& ab
a(?{f()+ - c - Missing right curly or square bracket
a(?{{1}+ - c - Missing right curly or square bracket
a(?{}})b - c -
-# XXX tmp disable this test - doesn't work for embedded qr// yet
-#a(?{"{"})b ab y - -
+a(?{"{"})b ab y - -
a(?{"\{"})b cabd y $& ab
a(?{"{"}})b - c - Sequence (?{...}) not terminated with ')'
-a(?{$::bl="\{"}).b caxbd y $::bl {
+a(?{$::bl="\{"}).b caxbd t $::bl {
x(~~)*(?:(?:F)?)? x~~ y - -
^a(?#xxx){3}c aaac y $& aaac
'^a (?#xxx) (?#yyy) {3}c'x aaac y $& aaac
@@ -606,8 +605,8 @@ $(?<=^(a)) a y $1 a
([\w:]+::)?(\w+)$ abcd y $1-$2 -abcd
([\w:]+::)?(\w+)$ xy:z:::abcd y $1-$2 xy:z:::-abcd
^[^bcd]*(c+) aexycd y $1 c
-(?{$a=2})a*aa(?{local$a=$a+1})k*c(?{$b=$a}) yaaxxaaaacd y $b 3
-(?{$a=2})(a(?{local$a=$a+1}))*aak*c(?{$b=$a}) yaaxxaaaacd y $b 4
+(?{$a=2})a*aa(?{local$a=$a+1})k*c(?{$b=$a}) yaaxxaaaacd t $b 3
+(?{$a=2})(a(?{local$a=$a+1}))*aak*c(?{$b=$a}) yaaxxaaaacd t $b 4
(>a+)ab aaab n - -
(?>a+)b aaab y - -
([[:]+) a:[b]: y $1 :[
@@ -851,7 +850,7 @@ abb$ b\nca n - -
'abb$'m b\nca n - -
(^|x)(c) ca y $2 c
a*abc?xyz+pqr{3}ab{2,}xy{4,5}pq{0,6}AB{0,}zz x n - -
-a(?{$a=2;$b=3;($b)=$a})b yabz y $b 2
+a(?{$a=2;$b=3;($b)=$a})b yabz t $b 2
round\(((?>[^()]+))\) _I(round(xs * sz),1) y $1 xs * sz
'((?x:.) )' x y $1- x -
'((?-x:.) )'x x y $1- x-
diff --git a/t/re/reg_eval_scope.t b/t/re/reg_eval_scope.t
index 1369a5fa61..51d41ec988 100644
--- a/t/re/reg_eval_scope.t
+++ b/t/re/reg_eval_scope.t
@@ -88,7 +88,7 @@ on;
fresh_perl_is <<'CODE', '123123', {},
for my $x(1..3) {
- push @regexps = qr/(?{ print $x })a/;
+ push @regexps, qr/(?{ print $x })a/;
}
"a" =~ $_ for @regexps;
"ba" =~ /b$_/ for @regexps;
@@ -102,8 +102,7 @@ is $pack, 'foo', 'qr// inherits package';
"a" =~ do { use re "/x"; qr/(?{ $::re = qr-- })a/ };
is $re, '(?^x:)', 'qr// inherits pragmata';
-on;
-
+$::pack = '';
"ba" =~ /b${\do { package baz; qr|(?{ $::pack = __PACKAGE__ })a| }}/;
is $pack, 'baz', '/text$qr/ inherits package';
"ba" =~ m+b${\do { use re "/i"; qr|(?{ $::re = qr-- })a| }}+;