diff options
author | David Mitchell <davem@iabyn.com> | 2012-05-15 21:01:39 +0100 |
---|---|---|
committer | David Mitchell <davem@iabyn.com> | 2012-06-13 13:32:53 +0100 |
commit | b3fd53f35858a4ca5c7226ba0fa5a9e864378c38 (patch) | |
tree | 760a9af83086fb9a5c529c6a50974adbb1d1bf11 | |
parent | b93070ed2e35b0430327cc866a4fcf4042014513 (diff) | |
download | perl-b3fd53f35858a4ca5c7226ba0fa5a9e864378c38.tar.gz |
eliminate PL_reg_start_tmp, PL_reg_start_tmpl
PL_reg_start_tmp is a global array of temporary parentheses start
positions. An element is set when a '(' is first encountered,
while when a ')' is seen, the per-regex offs array is updated with the
start and end position: the end derived from the position where the ')'
was encountered, and the start position derived from PL_reg_start_tmp[n].
This allows us to differentiate between pending and fully-processed
captures.
Change it so that the tmp start value becomes a third field in the offs
array (.start_tmp), along with the existing .start and .end fields. This
makes the value now per regex rather than global. Although it uses a bit
more memory (the start_tmp values aren't needed after the match has
completed), it simplifies the code, and will make it easier to make a
(??{}) switch to the new regex without having to dump everything on the
save stack.
-rw-r--r-- | perl.c | 3 | ||||
-rw-r--r-- | regcomp.c | 2 | ||||
-rw-r--r-- | regexec.c | 29 | ||||
-rw-r--r-- | regexp.h | 13 | ||||
-rw-r--r-- | scope.c | 3 | ||||
-rw-r--r-- | sv.c | 13 |
6 files changed, 16 insertions, 47 deletions
@@ -1236,9 +1236,6 @@ perl_destruct(pTHXx) Safefree(PL_origfilename); PL_origfilename = NULL; - Safefree(PL_reg_start_tmp); - PL_reg_start_tmp = (char**)NULL; - PL_reg_start_tmpl = 0; Safefree(PL_reg_curpm); Safefree(PL_reg_poscache); free_tied_hv_pool(); @@ -13901,8 +13901,6 @@ Perl_save_re_context(pTHX) Copy(&PL_reg_state, state, 1, struct re_save_state); - PL_reg_start_tmp = 0; - PL_reg_start_tmpl = 0; PL_reg_oldsaved = NULL; PL_reg_oldsavedlen = 0; PL_reg_maxiter = 0; @@ -366,12 +366,12 @@ S_regcppush(pTHX_ const regexp *rex, I32 parenfloor) /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */ SSPUSHINT(rex->offs[p].end); SSPUSHINT(rex->offs[p].start); - SSPUSHPTR(PL_reg_start_tmp[p]); + SSPUSHPTR(rex->offs[p].start_tmp); SSPUSHINT(p); DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log, " saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n", (UV)p, (IV)rex->offs[p].start, - (IV)(PL_reg_start_tmp[p] - PL_bostr), + (IV)(rex->offs[p].start_tmp - PL_bostr), (IV)rex->offs[p].end )); } @@ -425,7 +425,7 @@ S_regcppop(pTHX_ regexp *rex) for ( ; i > 0; i -= REGCP_PAREN_ELEMS) { I32 tmps; U32 paren = (U32)SSPOPINT; - PL_reg_start_tmp[paren] = (char *) SSPOPPTR; + rex->offs[paren].start_tmp = (char *) SSPOPPTR; rex->offs[paren].start = SSPOPINT; tmps = SSPOPINT; if (paren <= rex->lastparen) @@ -434,7 +434,7 @@ S_regcppop(pTHX_ regexp *rex) PerlIO_printf(Perl_debug_log, " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n", (UV)paren, (IV)rex->offs[paren].start, - (IV)(PL_reg_start_tmp[paren] - PL_bostr), + (IV)(rex->offs[paren].start_tmp - PL_bostr), (IV)rex->offs[paren].end, (paren > rex->lastparen ? "(no)" : "")); ); @@ -2645,13 +2645,6 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startpos) prog->lastparen = 0; prog->lastcloseparen = 0; PL_regsize = 0; - if (PL_reg_start_tmpl <= prog->nparens) { - PL_reg_start_tmpl = prog->nparens*3/2 + 3; - if(PL_reg_start_tmp) - Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*); - else - Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*); - } /* XXXX What this code is doing here?!!! There should be no need to do this again and again, prog->lastparen should take care of @@ -4462,14 +4455,6 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) ); startpoint = rei->program + 1; ST.close_paren = 0; /* only used for GOSUB */ - /* borrowed from regtry */ - if (PL_reg_start_tmpl <= re->nparens) { - PL_reg_start_tmpl = re->nparens*3/2 + 3; - if(PL_reg_start_tmp) - Renew(PL_reg_start_tmp, PL_reg_start_tmpl, char*); - else - Newx(PL_reg_start_tmp, PL_reg_start_tmpl, char*); - } eval_recurse_doit: /* Share code with GOSUB below this line */ /* run the pattern returned from (??{...}) */ @@ -4552,14 +4537,14 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) case OPEN: n = ARG(scan); /* which paren pair */ - PL_reg_start_tmp[n] = locinput; + rex->offs[n].start_tmp = locinput; if (n > PL_regsize) PL_regsize = n; lastopen = n; break; case CLOSE: n = ARG(scan); /* which paren pair */ - rex->offs[n].start = PL_reg_start_tmp[n] - PL_bostr; + rex->offs[n].start = rex->offs[n].start_tmp - PL_bostr; rex->offs[n].end = locinput - PL_bostr; /*if (n > PL_regsize) PL_regsize = n;*/ @@ -4581,7 +4566,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) n = ARG(cursor); if ( n <= lastopen ) { rex->offs[n].start - = PL_reg_start_tmp[n] - PL_bostr; + = rex->offs[n].start_tmp - PL_bostr; rex->offs[n].end = locinput - PL_bostr; /*if (n > PL_regsize) PL_regsize = n;*/ @@ -50,9 +50,18 @@ struct reg_substr_data { #define SV_SAVED_COPY #endif +/* offsets within a string of a particular /(.)/ capture */ + typedef struct regexp_paren_pair { I32 start; I32 end; + /* 'start_tmp' records a new opening position before the matching end + * has been found, so that the old start and end values are still + * valid, e.g. + * "abc" =~ /(.(?{print "[$1]"}))+/ + *outputs [][a][b] + * This field is not part of the API. */ + char *start_tmp; } regexp_paren_pair; #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_UTF8_C) @@ -738,8 +747,6 @@ typedef struct regmatch_slab { #define PL_bostr PL_reg_state.re_state_bostr #define PL_reginput PL_reg_state.re_state_reginput #define PL_regeol PL_reg_state.re_state_regeol -#define PL_reg_start_tmp PL_reg_state.re_state_reg_start_tmp -#define PL_reg_start_tmpl PL_reg_state.re_state_reg_start_tmpl #define PL_reg_match_utf8 PL_reg_state.re_state_reg_match_utf8 #define PL_reg_magic PL_reg_state.re_state_reg_magic #define PL_reg_oldpos PL_reg_state.re_state_reg_oldpos @@ -757,14 +764,12 @@ typedef struct regmatch_slab { struct re_save_state { U32 re_state_reg_flags; /* from regexec.c */ - U32 re_state_reg_start_tmpl; /* from regexec.c */ bool re_state_eval_setup_done; /* from regexec.c */ bool re_state_reg_match_utf8; /* from regexec.c */ bool re_reparsing; /* runtime (?{}) fed back into parser */ char *re_state_bostr; char *re_state_reginput; /* String-input pointer. */ char *re_state_regeol; /* End of input, for $ check. */ - char **re_state_reg_start_tmp; /* from regexec.c */ MAGIC *re_state_reg_magic; /* from regexec.c */ PMOP *re_state_reg_oldcurpm; /* from regexec.c */ PMOP *re_state_reg_curpm; /* from regexec.c */ @@ -1136,9 +1136,6 @@ Perl_leave_scope(pTHX_ I32 base) - SAVESTACK_ALLOC_FOR_RE_SAVE_STATE); PL_savestack_ix -= SAVESTACK_ALLOC_FOR_RE_SAVE_STATE; - if (PL_reg_start_tmp != state->re_state_reg_start_tmp) { - Safefree(PL_reg_start_tmp); - } if (PL_reg_poscache != state->re_state_reg_poscache) { Safefree(PL_reg_poscache); } @@ -12708,19 +12708,6 @@ Perl_ss_dup(pTHX_ PerlInterpreter *proto_perl, CLONE_PARAMS* param) = pv_dup(old_state->re_state_reginput); new_state->re_state_regeol = pv_dup(old_state->re_state_regeol); - /* XXX This just has to be broken. The old save_re_context - code did SAVEGENERICPV(PL_reg_start_tmp); - PL_reg_start_tmp is char **. - Look above to what the dup code does for - SAVEt_GENERIC_PVREF - It can never have worked. - So this is merely a faithful copy of the exiting bug: */ - new_state->re_state_reg_start_tmp - = (char **) pv_dup((char *) - old_state->re_state_reg_start_tmp); - /* I assume that it only ever "worked" because no-one called - (pseudo)fork while the regexp engine had re-entered itself. - */ #ifdef PERL_OLD_COPY_ON_WRITE new_state->re_state_nrs = sv_dup(old_state->re_state_nrs, param); |