diff options
author | Father Chrysostomos <sprout@cpan.org> | 2013-07-25 00:41:07 -0700 |
---|---|---|
committer | Father Chrysostomos <sprout@cpan.org> | 2013-08-25 12:23:59 -0700 |
commit | 99a90e5967b33d68a38c309edf24275f1c8a979f (patch) | |
tree | cfc8416c25f4661ec9901d4103e77a5f636a2d90 | |
parent | 389ecb564541f5a336b531db204970925ed27790 (diff) | |
download | perl-99a90e5967b33d68a38c309edf24275f1c8a979f.tar.gz |
[perl #116907] Allow //g matching past 2**31 threshold
Change the internal fields for storing positions so that //g in scalar
context can move past the 2**31 character threshold. Before this com-
mit, the numbers would wrap, resulting in assertion failures.
The changes in this commit are only enough to get the added test pass-
ing. Stay tuned for more.
-rw-r--r-- | embed.fnc | 2 | ||||
-rw-r--r-- | pp_hot.c | 2 | ||||
-rw-r--r-- | proto.h | 2 | ||||
-rw-r--r-- | regcomp.c | 4 | ||||
-rw-r--r-- | regexec.c | 16 | ||||
-rw-r--r-- | regexp.h | 7 | ||||
-rw-r--r-- | t/bigmem/regexp.t | 12 |
7 files changed, 27 insertions, 18 deletions
@@ -2091,7 +2091,7 @@ Es |U8 |regtail_study |NN struct RExC_state_t *pRExC_state \ #if defined(PERL_IN_REGEXEC_C) ERs |bool |isFOO_lc |const U8 classnum|const U8 character ERs |bool |isFOO_utf8_lc |const U8 classnum|NN const U8* character -ERs |I32 |regmatch |NN regmatch_info *reginfo|NN char *startpos|NN regnode *prog +ERs |SSize_t|regmatch |NN regmatch_info *reginfo|NN char *startpos|NN regnode *prog ERs |I32 |regrepeat |NN regexp *prog|NN char **startposp \ |NN const regnode *p \ |NN regmatch_info *const reginfo \ @@ -1326,7 +1326,7 @@ PP(pp_match) PMOP *dynpm = pm; const char *s; const char *strend; - I32 curpos = 0; /* initial pos() or current $+[0] */ + SSize_t curpos = 0; /* initial pos() or current $+[0] */ I32 global; U8 r_flags = 0; const char *truebase; /* Start of string */ @@ -7000,7 +7000,7 @@ STATIC bool S_reginclass(pTHX_ regexp * const prog, const regnode * const n, con #define PERL_ARGS_ASSERT_REGINCLASS \ assert(n); assert(p) -STATIC I32 S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) +STATIC SSize_t S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) __attribute__warn_unused_result__ __attribute__nonnull__(pTHX_1) __attribute__nonnull__(pTHX_2) @@ -6729,7 +6729,7 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren, struct regexp *const rx = ReANY(r); char *s = NULL; I32 i = 0; - I32 s1, t1; + SSize_t s1, t1; I32 n = paren; PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_FETCH; @@ -6787,7 +6787,7 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren, } assert(s >= rx->subbeg); - assert(rx->sublen >= (s - rx->subbeg) + i ); + assert((STRLEN)rx->sublen >= (STRLEN)((s - rx->subbeg) + i) ); if (i >= 0) { #if NO_TAINT_SUPPORT sv_setpvn(sv, s, i); @@ -296,8 +296,8 @@ S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxopenparen) ); for (p = parenfloor+1; p <= (I32)maxopenparen; p++) { /* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */ - SSPUSHINT(rex->offs[p].end); - SSPUSHINT(rex->offs[p].start); + SSPUSHIV(rex->offs[p].end); + SSPUSHIV(rex->offs[p].start); SSPUSHINT(rex->offs[p].start_tmp); DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log, " \\%"UVuf": %"IVdf"(%"IVdf")..%"IVdf"\n", @@ -371,8 +371,8 @@ S_regcppop(pTHX_ regexp *rex, U32 *maxopenparen_p) for ( ; i > 0; i -= REGCP_PAREN_ELEMS) { I32 tmps; rex->offs[paren].start_tmp = SSPOPINT; - rex->offs[paren].start = SSPOPINT; - tmps = SSPOPINT; + rex->offs[paren].start = SSPOPIV; + tmps = SSPOPIV; if (paren <= rex->lastparen) rex->offs[paren].end = tmps; DEBUG_BUFFERS_r( PerlIO_printf(Perl_debug_log, @@ -2097,8 +2097,8 @@ S_reg_set_capture_string(pTHX_ REGEXP * const rx, } else #endif { - I32 min = 0; - I32 max = strend - strbeg; + SSize_t min = 0; + SSize_t max = strend - strbeg; I32 sublen; if ( (flags & REXEC_COPY_SKIP_POST) @@ -2938,7 +2938,7 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startposp) CHECKPOINT lastcp; REGEXP *const rx = reginfo->prog; regexp *const prog = ReANY(rx); - I32 result; + SSize_t result; RXi_GET_DECL(prog,progi); GET_RE_DEBUG_FLAGS_DECL; @@ -3583,7 +3583,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p, } /* returns -1 on failure, $+[0] on success */ -STATIC I32 +STATIC SSize_t S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) { #if PERL_VERSION < 9 && !defined(PERL_CORE) @@ -55,8 +55,8 @@ struct reg_substr_data { /* offsets within a string of a particular /(.)/ capture */ typedef struct regexp_paren_pair { - I32 start; - I32 end; + SSize_t start; + SSize_t end; /* 'start_tmp' records a new opening position before the matching end * has been found, so that the old start and end values are still * valid, e.g. @@ -503,7 +503,8 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp) #define RX_SAVED_COPY(prog) (ReANY(prog)->saved_copy) /* last match was zero-length */ #define RX_ZERO_LEN(prog) \ - (RX_OFFS(prog)[0].start + RX_GOFS(prog) == (UV)RX_OFFS(prog)[0].end) + (RX_OFFS(prog)[0].start + (SSize_t)RX_GOFS(prog) \ + == RX_OFFS(prog)[0].end) #endif /* PLUGGABLE_RE_EXTENSION */ diff --git a/t/bigmem/regexp.t b/t/bigmem/regexp.t index ef029fbd9b..ef74e59ab7 100644 --- a/t/bigmem/regexp.t +++ b/t/bigmem/regexp.t @@ -12,11 +12,19 @@ $ENV{PERL_TEST_MEMORY} >= 2 $Config{ptrsize} >= 8 or skip_all("Need 64-bit pointers for this test"); -plan(2); +plan(3); # [perl #116907] # ${\2} to defeat constant folding, which in this case actually slows # things down -my $x=" "x(${\2}**31); +my $x=" "x(${\2}**31) . "abcdefg"; ok $x =~ /./, 'match against long string succeeded'; is "$-[0]-$+[0]", '0-1', '@-/@+ after match against long string'; + +pos $x = 2**31-1; +my $result; +for(1..5) { + $x =~ /./g; + $result .= "$&-"; +} +is $result," -a-b-c-d-", 'scalar //g hopping past the 2**31 threshold'; |