summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2012-03-18 15:53:40 +0000
committerDavid Mitchell <davem@iabyn.com>2012-06-13 13:32:50 +0100
commitd24ca0c5f11250dcd2552c84a048bda5786ba8d1 (patch)
treefcb4bd939264b649391c45a71cca9af257d5c6dc /regexec.c
parente485beb85b12595f4a784d37e5f42d36644128ba (diff)
downloadperl-d24ca0c5f11250dcd2552c84a048bda5786ba8d1.tar.gz
Fix up runtime regex codeblocks.
The previous commits in this branch have brought literal code blocks into the New World Order; now do the same for runtime blocks, i.e. those needing "use re 'eval'". The main user-visible changes from this commit are that: * the code is now fully parsed, rather than needing balanced {}'s; i.e. this now works: my $code = q[ (?{ $a = '{' }) ]; use re 'eval'; /$code/ * warnings and errors are now reported as coming from "(eval NNN)" rather than "(re_eval NNN)" (although see the next commit for some fixups to that). Indeed, the string "re_eval" has been expunged from the source and documentation. The big internal difference is that the sv_compile_2op() and sv_compile_2op_is_broken() functions are no longer used, and will be removed shorty. It works by the regex compiler detecting the presence of run-time code blocks, and feeding the whole pattern string back into the parser (where the run-time blocks are now seen as compile-time), then extracting out any compiled code blocks and adding them to the mix. For example, in the following: $c = '(?{"runtime"})d'; use re 'eval'; /a(?{"literal"})\b'$c/ At the point the regex compiler is called, the perl parser will already have compiled the literal code block and presented it to the regex engine. The engine examines the pattern string, sees two '(?{', but only one accounted for by the parser, and so constructs a short string to be evalled: based on the pattern, but with literal code-blocks blanked out, and \ and ' escaped. In the above example, the pattern string is a(?{"literal"})\b'(?{"runtime"})d and we call eval_sv() with an SV containing the text qr'a \\b\'(?{"runtime"})d' The returned qr will contain the new code-block (and associated CV and pad) which can be extracted and added to the list of compiled code blocks of the original pattern. Note that with this scheme, the requirement for "use re 'eval'" is easily determined, and no longer requires all the pp_regcreset / PL_reginterp_cnt machinery, which will be removed shortly. Two subtleties of this scheme are that normally, \\ isn't collapsed into \ for literal regexes (unlike literal strings), and hints aren't inherited when using eval_sv(). We get round both of these by adding and setting a new flag, PL_reg_state.re_reparsing, which indicates that we are refeeding a pattern into the perl parser.
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c11
1 files changed, 5 insertions, 6 deletions
diff --git a/regexec.c b/regexec.c
index 642b48f37b..f94d15adce 100644
--- a/regexec.c
+++ b/regexec.c
@@ -4267,6 +4267,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
* variable.
*/
Copy(&PL_reg_state, &saved_state, 1, struct re_save_state);
+ PL_reg_state.re_reparsing = FALSE;
n = ARG(scan);
if (rexi->data->what[n] == 'r') { /* code from an external qr */
@@ -4283,16 +4284,14 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
new_comppad = initial_pad; /* the pad of the current sub */
PL_op = (OP_4tree*)rexi->data->data[n];
}
- else if (rexi->data->what[n] == 'L') { /* literal with own CV */
- new_comppad = (PAD*)AvARRAY(CvPADLIST(rex->qr_anoncv))[1];
- PL_op = (OP_4tree*)rexi->data->data[n];
- }
else {
+ /* literal with own CV */
+ assert(rexi->data->what[n] == 'L');
+ new_comppad = (PAD*)AvARRAY(CvPADLIST(rex->qr_anoncv))[1];
PL_op = (OP_4tree*)rexi->data->data[n];
- new_comppad = (PAD*)rexi->data->data[n + 2];
}
DEBUG_STATE_r( PerlIO_printf(Perl_debug_log,
- " re_eval 0x%"UVxf"\n", PTR2UV(PL_op)) );
+ " re EVAL PL_op=0x%"UVxf"\n", PTR2UV(PL_op)) );
/* wrap the call in two SAVECOMPPADs. This ensures that
* when the save stack is eventually unwound, all the
* accumulated SAVEt_CLEARSV's will be processed with