summaryrefslogtreecommitdiff
path: root/regcomp.c
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2011-11-12 20:51:27 +0000
committerDavid Mitchell <davem@iabyn.com>2012-06-13 13:25:52 +0100
commit346d3070ad0aa2f924e07bfdde6bb8809da64e45 (patch)
tree30b7f0ab2b93b7eaaa34beb85ce8588872584fd8 /regcomp.c
parentb1603ef883c1cc16a978ec3c12aa376876ceb8d6 (diff)
downloadperl-346d3070ad0aa2f924e07bfdde6bb8809da64e45.tar.gz
Handle literal code blocks in runtime regexes
In the following types of regex: /$runtime(?{...})/ qr/$runtime(?{...})/ make it so that the code block is compiled at the same time that the surrounding code is compiled, then is incorporated, rather than re-compiled, when the regex source is assembled and compiled at runtime. This fixes a bunch of closure-related TODO tests. Note that this still doesn't yet handle the cases where $runtime contains: $runtime = qr/...(?{...})/; # block will be stringified and recompiled $runtime = '(?{...})'; # block compiled the old way, with matching nesting of {} required It also doesn't yet handle the case where the pattern getting compiled is upgraded to utf8 and so is restarted. Note that this is rather complex, because in something like $str =~ qr/$a(?{...})$b[1]/ there are four separate phases * perl compile time; we also compile the code block at the same time, but within a separate anon CV (with a separate pad) * at run time, we execute the code that generates the list of SVs (i.e. $a, $b[1] etc), but have to execute them within the context of the anon sub, since that's what they were compiled in; we then have to concat the arguments, while remembering which were literal code blocks; * then qr// clones the compiled regex, and clones the anon CV at the same time; * finally, the pattern is executed. Through all this we have to ensure that the code blocks and associated anon CV and pad get preserved and incorporated into the right places for eventual use. The changes in this commit build upon the work in the previous few commits, and work by: * at (perl) compile time, in pmruntime(), the anon CV (if any) associated with a qr//, as well as being referred to by the op_targ of the anoncode op, is also made the targ of the regcomp op; * at pattern assembly and compile time, * Perl_re_op_compile() takes the list of SVs gathered by pp_regcomp(), along with the op tree (from op_code_list) that was used to generate those SVs (as well as containing the individual DO blocks), and concatenates them to get a final pattern source string, while noting the start and end positions of any literal (?{..})'s, and which block they must correspond to. * after compilation, pp_regcomp() then uses op_targ to locate the anon CV and store a pointer to it in the regex. qr// instantiation and execution work unchanged.
Diffstat (limited to 'regcomp.c')
-rw-r--r--regcomp.c38
1 files changed, 35 insertions, 3 deletions
diff --git a/regcomp.c b/regcomp.c
index 6c16e8ecae..25ffc005cf 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -5189,18 +5189,50 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int pat_count,
}
if (pat_count > 1) {
- /* concat multiple args */
+ /* concat multiple args and find any code block indexes */
+
+ OP *o = NULL;
+ int n = 0;
+
+ if (pRExC_state->num_code_blocks) {
+ o = cLISTOPx(expr)->op_first;
+ assert(o->op_type == OP_PUSHMARK);
+ o = o->op_sibling;
+ }
- pRExC_state->num_code_blocks = 0; /* XXX tmp */
pat = newSVpvn("", 0);
SAVEFREESV(pat);
for (svp = patternp; svp < patternp + pat_count; svp++) {
SV *sv, *msv = *svp;
+ bool code = 0;
+ if (o) {
+ if (o->op_type == OP_NULL && (o->op_flags & OPf_SPECIAL)) {
+ n++;
+ assert(n <= pRExC_state->num_code_blocks);
+ pRExC_state->code_blocks[n-1].start = SvCUR(pat);
+ pRExC_state->code_blocks[n-1].block = o;
+ code = 1;
+ o = o->op_sibling; /* skip CONST */
+ assert(o);
+ }
+ o = o->op_sibling;;
+ }
+
if ((SvAMAGIC(pat) || SvAMAGIC(msv)) &&
(sv = amagic_call(pat, msv, concat_amg, AMGf_assign)))
+ {
sv_setsv(pat, sv);
- else
+ /* overloading involved: all bets are off over literal
+ * code. Pretend we haven't seen it */
+ pRExC_state->num_code_blocks -= n;
+ n = 0;
+
+ }
+ else {
sv_catsv_nomg(pat, msv);
+ if (code)
+ pRExC_state->code_blocks[n-1].end = SvCUR(pat);
+ }
}
SvSETMAGIC(pat);
}