diff options
author | David Mitchell <davem@iabyn.com> | 2013-05-19 09:38:23 +0100 |
---|---|---|
committer | David Mitchell <davem@iabyn.com> | 2013-06-02 22:28:51 +0100 |
commit | 8adc0f72b0398cece49d44d4acc0962d03543ea9 (patch) | |
tree | 44698d175bd412fed0431a239e971e146cac91c2 /regexec.c | |
parent | a46737de7eb272b1765c352593c3e3627bccdad5 (diff) | |
download | perl-8adc0f72b0398cece49d44d4acc0962d03543ea9.tar.gz |
add regmatch_eval_state struct
Replace several PL_reg* vars with a new struct. This is part of the
goal of removing all global regex state.
These particular vars are used in the case of a regex with (?{}) code
blocks. In this case, when the code in a block is called, various bits of
state (such as $1, pos($_)) are temporarily set up, even though the match
has not yet completed.
This involves updating the current PL_curpm to point to a fake PMOP which
points to the regex currently being executed. That regex has all its
current fields that are associated with captures (such as subbeg)
temporarily saved and overwritten with the current partial match results.
Similarly, $_ is temporarily aliased to the current match string, and any
old pos() position is saved. This saving was formerly done to the various
PL_reg* vars.
When the regex has finished executing (or if the code block croaks),
its fields are restored to the original values. Since this can happen in a
croak, it may be done using SAVEDESTRUCTOR_X() on the save stack. This
precludes just moving the PL_reg* vars into the regmatch_info struct,
since that is just allocated as a local var in regexec_flags(), and would
have already been abandoned and possibly overwritten after the croak and
longjmp, but before the SAVEDESTRUCTOR_X() action is taken.
So instead we put all the vars into new struct, and malloc that on entry to
the regex engine when we know we need to copy the various fields.
We save a pointer to that in the regmatch_info struct, as well as passing
it to SAVEDESTRUCTOR_X(). The destructor may get called up to twice in the
non-croak case: first it's called explicitly at the end of regexec_flags(),
which restores subbeg etc; then again from the savestack, which just
free()s the struct. In the croak case, it's called just once, and does
both the restoring and the freeing.
The vars / PL_reg_state fields this commit eliminates are:
re_state_eval_setup_done
PL_reg_oldsaved
PL_reg_oldsavedlen
PL_reg_oldsavedoffset
PL_reg_oldsavedcoffset
PL_reg_magic
PL_reg_oldpos
PL_nrs
PL_reg_oldcurpm
Diffstat (limited to 'regexec.c')
-rw-r--r-- | regexec.c | 101 |
1 files changed, 66 insertions, 35 deletions
@@ -651,6 +651,7 @@ Perl_re_intuit_start(pTHX_ goto fail; } + reginfo->eval_state = NULL; reginfo->strbeg = strbeg; reginfo->strend = strend; reginfo->is_utf8_pat = is_utf8_pat; @@ -2091,6 +2092,8 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend, } multiline = prog->extflags & RXf_PMf_MULTILINE; + + reginfo->eval_state = NULL; reginfo->prog = rx; /* Yes, sorry that this is confusing. */ reginfo->intuit = 0; @@ -2115,7 +2118,6 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend, } RX_MATCH_TAINTED_off(rx); - PL_reg_state.re_state_eval_setup_done = FALSE; PL_reg_maxiter = 0; reginfo->is_utf8_pat = cBOOL(RX_UTF8(rx)); @@ -2602,8 +2604,11 @@ got_it: ); Safefree(swap); - if (PL_reg_state.re_state_eval_setup_done) - S_restore_eval_state(aTHX_ prog); + if (reginfo->eval_state) { + reginfo->eval_state->direct = TRUE; + S_restore_eval_state(aTHX_ reginfo->eval_state); + } + if (RXp_PAREN_NAMES(prog)) (void)hv_iterinit(RXp_PAREN_NAMES(prog)); @@ -2731,8 +2736,12 @@ got_it: phooey: DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch failed%s\n", PL_colors[4], PL_colors[5])); - if (PL_reg_state.re_state_eval_setup_done) - S_restore_eval_state(aTHX_ prog); + + if (reginfo->eval_state) { + reginfo->eval_state->direct = TRUE; + S_restore_eval_state(aTHX_ reginfo->eval_state); + } + if (swap) { /* we failed :-( roll it back */ DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log, @@ -2751,7 +2760,7 @@ phooey: /* Set which rex is pointed to by PL_reg_state, handling ref counting. * Do inc before dec, in case old and new rex are the same */ #define SET_reg_curpm(Re2) \ - if (PL_reg_state.re_state_eval_setup_done) { \ + if (reginfo->eval_state) { \ (void)ReREFCNT_inc(Re2); \ ReREFCNT_dec(PM_GETRE(PL_reg_curpm)); \ PM_SETRE((PL_reg_curpm), (Re2)); \ @@ -2776,8 +2785,7 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startposp) reginfo->cutpoint=NULL; - if ((prog->extflags & RXf_EVAL_SEEN) - && !PL_reg_state.re_state_eval_setup_done) + if ((prog->extflags & RXf_EVAL_SEEN) && !reginfo->eval_state) S_setup_eval_state(aTHX_ reginfo); #ifdef DEBUGGING @@ -4914,8 +4922,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) DEBUG_STATE_r( PerlIO_printf(Perl_debug_log, " re EVAL PL_op=0x%"UVxf"\n", PTR2UV(nop)) ); - rex->offs[0].end = PL_reg_magic->mg_len = - locinput - reginfo->strbeg; + rex->offs[0].end = locinput - reginfo->strbeg; + if (reginfo->eval_state->pos_magic) + reginfo->eval_state->pos_magic->mg_len + = locinput - reginfo->strbeg; if (sv_yes_mark) { SV *sv_mrk = get_sv("REGMARK", 1); @@ -6500,7 +6510,7 @@ yes: DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, "%sMatch successful!%s\n", PL_colors[4], PL_colors[5])); - if (PL_reg_state.re_state_eval_setup_done) { + if (reginfo->eval_state) { /* each successfully executed (?{...}) block does the equivalent of * local $^R = do {...} * When popping the save stack, all these locals would be undone; @@ -7481,8 +7491,16 @@ S_setup_eval_state(pTHX_ regmatch_info *const reginfo) { MAGIC *mg; regexp *const rex = ReANY(reginfo->prog); + regmatch_eval_state *eval_state; + + Newx(eval_state, 1, regmatch_eval_state); + assert(!reginfo->eval_state); + reginfo->eval_state = eval_state; + + eval_state->restored = FALSE; + eval_state->direct = FALSE; + eval_state->rex = rex; - PL_reg_state.re_state_eval_setup_done = TRUE; if (reginfo->sv) { /* Make $_ available to executed code. */ if (reginfo->sv != DEFSV) { @@ -7501,9 +7519,12 @@ S_setup_eval_state(pTHX_ regmatch_info *const reginfo) &PL_vtbl_mglob, NULL, 0); mg->mg_len = -1; } - PL_reg_magic = mg; - PL_reg_oldpos = mg->mg_len; + eval_state->pos_magic = mg; + eval_state->pos = mg->mg_len; } + else + eval_state->pos_magic = NULL; + if (!PL_reg_curpm) { Newxz(PL_reg_curpm, 1, PMOP); #ifdef USE_ITHREADS @@ -7518,55 +7539,65 @@ S_setup_eval_state(pTHX_ regmatch_info *const reginfo) #endif } SET_reg_curpm(reginfo->prog); - PL_reg_oldcurpm = PL_curpm; + eval_state->curpm = PL_curpm; PL_curpm = PL_reg_curpm; if (RXp_MATCH_COPIED(rex)) { /* Here is a serious problem: we cannot rewrite subbeg, since it may be needed if this match fails. Thus $` inside (?{}) could fail... */ - PL_reg_oldsaved = rex->subbeg; - PL_reg_oldsavedlen = rex->sublen; - PL_reg_oldsavedoffset = rex->suboffset; - PL_reg_oldsavedcoffset = rex->suboffset; + eval_state->subbeg = rex->subbeg; + eval_state->sublen = rex->sublen; + eval_state->suboffset = rex->suboffset; + eval_state->subcoffset = rex->suboffset; #ifdef PERL_ANY_COW - PL_nrs = rex->saved_copy; + eval_state->saved_copy = rex->saved_copy; #endif RXp_MATCH_COPIED_off(rex); } else - PL_reg_oldsaved = NULL; + eval_state->subbeg = NULL; rex->subbeg = (char *)reginfo->strbeg; rex->suboffset = 0; rex->subcoffset = 0; rex->sublen = reginfo->strend - reginfo->strbeg; - SAVEDESTRUCTOR_X(S_restore_eval_state, rex); + SAVEDESTRUCTOR_X(S_restore_eval_state, eval_state); } /* undo the effects of S_setup_eval_state() - can either be called - * directly, or via a destructor */ + * directly, or via a destructor. If we get called directly, we'll still + * get called again later from the destructor */ static void S_restore_eval_state(pTHX_ void *arg) { dVAR; - regexp * const rex = (regexp *)arg; - if (PL_reg_state.re_state_eval_setup_done) { - if (PL_reg_oldsaved) { - rex->subbeg = PL_reg_oldsaved; - rex->sublen = PL_reg_oldsavedlen; - rex->suboffset = PL_reg_oldsavedoffset; - rex->subcoffset = PL_reg_oldsavedcoffset; + regmatch_eval_state * const eval_state = (regmatch_eval_state *)arg; + regexp * const rex = eval_state->rex; + + if (!eval_state->restored) { + if (eval_state->subbeg) { + rex->subbeg = eval_state->subbeg; + rex->sublen = eval_state->sublen; + rex->suboffset = eval_state->suboffset; + rex->subcoffset = eval_state->subcoffset; #ifdef PERL_ANY_COW - rex->saved_copy = PL_nrs; + rex->saved_copy = eval_state->saved_copy; #endif RXp_MATCH_COPIED_on(rex); } - PL_reg_magic->mg_len = PL_reg_oldpos; - PL_reg_state.re_state_eval_setup_done = FALSE; - PL_curpm = PL_reg_oldcurpm; - } + if (eval_state->pos_magic) + eval_state->pos_magic->mg_len = eval_state->pos; + PL_curpm = eval_state->curpm; + eval_state->restored = TRUE; + } + if (eval_state->direct) + eval_state->direct = FALSE; + else + /* we're being called from a destructor rather than directly */ + Safefree(eval_state); } + STATIC void S_to_utf8_substr(pTHX_ regexp *prog) { |