summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFather Chrysostomos <sprout@cpan.org>2013-07-25 00:41:07 -0700
committerFather Chrysostomos <sprout@cpan.org>2013-08-25 12:23:59 -0700
commit99a90e5967b33d68a38c309edf24275f1c8a979f (patch)
treecfc8416c25f4661ec9901d4103e77a5f636a2d90
parent389ecb564541f5a336b531db204970925ed27790 (diff)
downloadperl-99a90e5967b33d68a38c309edf24275f1c8a979f.tar.gz
[perl #116907] Allow //g matching past 2**31 threshold
Change the internal fields for storing positions so that //g in scalar context can move past the 2**31 character threshold. Before this com- mit, the numbers would wrap, resulting in assertion failures. The changes in this commit are only enough to get the added test pass- ing. Stay tuned for more.
-rw-r--r--embed.fnc2
-rw-r--r--pp_hot.c2
-rw-r--r--proto.h2
-rw-r--r--regcomp.c4
-rw-r--r--regexec.c16
-rw-r--r--regexp.h7
-rw-r--r--t/bigmem/regexp.t12
7 files changed, 27 insertions, 18 deletions
diff --git a/embed.fnc b/embed.fnc
index 73951d931c..39b3bb4888 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -2091,7 +2091,7 @@ Es |U8 |regtail_study |NN struct RExC_state_t *pRExC_state \
#if defined(PERL_IN_REGEXEC_C)
ERs |bool |isFOO_lc |const U8 classnum|const U8 character
ERs |bool |isFOO_utf8_lc |const U8 classnum|NN const U8* character
-ERs |I32 |regmatch |NN regmatch_info *reginfo|NN char *startpos|NN regnode *prog
+ERs |SSize_t|regmatch |NN regmatch_info *reginfo|NN char *startpos|NN regnode *prog
ERs |I32 |regrepeat |NN regexp *prog|NN char **startposp \
|NN const regnode *p \
|NN regmatch_info *const reginfo \
diff --git a/pp_hot.c b/pp_hot.c
index ca285e2907..e07b1b9f1c 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -1326,7 +1326,7 @@ PP(pp_match)
PMOP *dynpm = pm;
const char *s;
const char *strend;
- I32 curpos = 0; /* initial pos() or current $+[0] */
+ SSize_t curpos = 0; /* initial pos() or current $+[0] */
I32 global;
U8 r_flags = 0;
const char *truebase; /* Start of string */
diff --git a/proto.h b/proto.h
index 7326fb805e..d7c9f32fb7 100644
--- a/proto.h
+++ b/proto.h
@@ -7000,7 +7000,7 @@ STATIC bool S_reginclass(pTHX_ regexp * const prog, const regnode * const n, con
#define PERL_ARGS_ASSERT_REGINCLASS \
assert(n); assert(p)
-STATIC I32 S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
+STATIC SSize_t S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_2)
diff --git a/regcomp.c b/regcomp.c
index 69bb66d27a..125a9a2d14 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -6729,7 +6729,7 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
struct regexp *const rx = ReANY(r);
char *s = NULL;
I32 i = 0;
- I32 s1, t1;
+ SSize_t s1, t1;
I32 n = paren;
PERL_ARGS_ASSERT_REG_NUMBERED_BUFF_FETCH;
@@ -6787,7 +6787,7 @@ Perl_reg_numbered_buff_fetch(pTHX_ REGEXP * const r, const I32 paren,
}
assert(s >= rx->subbeg);
- assert(rx->sublen >= (s - rx->subbeg) + i );
+ assert((STRLEN)rx->sublen >= (STRLEN)((s - rx->subbeg) + i) );
if (i >= 0) {
#if NO_TAINT_SUPPORT
sv_setpvn(sv, s, i);
diff --git a/regexec.c b/regexec.c
index a1d61a5d9c..29991b5820 100644
--- a/regexec.c
+++ b/regexec.c
@@ -296,8 +296,8 @@ S_regcppush(pTHX_ const regexp *rex, I32 parenfloor, U32 maxopenparen)
);
for (p = parenfloor+1; p <= (I32)maxopenparen; p++) {
/* REGCP_PARENS_ELEMS are pushed per pairs of parentheses. */
- SSPUSHINT(rex->offs[p].end);
- SSPUSHINT(rex->offs[p].start);
+ SSPUSHIV(rex->offs[p].end);
+ SSPUSHIV(rex->offs[p].start);
SSPUSHINT(rex->offs[p].start_tmp);
DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log,
" \\%"UVuf": %"IVdf"(%"IVdf")..%"IVdf"\n",
@@ -371,8 +371,8 @@ S_regcppop(pTHX_ regexp *rex, U32 *maxopenparen_p)
for ( ; i > 0; i -= REGCP_PAREN_ELEMS) {
I32 tmps;
rex->offs[paren].start_tmp = SSPOPINT;
- rex->offs[paren].start = SSPOPINT;
- tmps = SSPOPINT;
+ rex->offs[paren].start = SSPOPIV;
+ tmps = SSPOPIV;
if (paren <= rex->lastparen)
rex->offs[paren].end = tmps;
DEBUG_BUFFERS_r( PerlIO_printf(Perl_debug_log,
@@ -2097,8 +2097,8 @@ S_reg_set_capture_string(pTHX_ REGEXP * const rx,
} else
#endif
{
- I32 min = 0;
- I32 max = strend - strbeg;
+ SSize_t min = 0;
+ SSize_t max = strend - strbeg;
I32 sublen;
if ( (flags & REXEC_COPY_SKIP_POST)
@@ -2938,7 +2938,7 @@ S_regtry(pTHX_ regmatch_info *reginfo, char **startposp)
CHECKPOINT lastcp;
REGEXP *const rx = reginfo->prog;
regexp *const prog = ReANY(rx);
- I32 result;
+ SSize_t result;
RXi_GET_DECL(prog,progi);
GET_RE_DEBUG_FLAGS_DECL;
@@ -3583,7 +3583,7 @@ S_setup_EXACTISH_ST_c1_c2(pTHX_ const regnode * const text_node, int *c1p,
}
/* returns -1 on failure, $+[0] on success */
-STATIC I32
+STATIC SSize_t
S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
{
#if PERL_VERSION < 9 && !defined(PERL_CORE)
diff --git a/regexp.h b/regexp.h
index fd6425f403..c8d03e3758 100644
--- a/regexp.h
+++ b/regexp.h
@@ -55,8 +55,8 @@ struct reg_substr_data {
/* offsets within a string of a particular /(.)/ capture */
typedef struct regexp_paren_pair {
- I32 start;
- I32 end;
+ SSize_t start;
+ SSize_t end;
/* 'start_tmp' records a new opening position before the matching end
* has been found, so that the old start and end values are still
* valid, e.g.
@@ -503,7 +503,8 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp)
#define RX_SAVED_COPY(prog) (ReANY(prog)->saved_copy)
/* last match was zero-length */
#define RX_ZERO_LEN(prog) \
- (RX_OFFS(prog)[0].start + RX_GOFS(prog) == (UV)RX_OFFS(prog)[0].end)
+ (RX_OFFS(prog)[0].start + (SSize_t)RX_GOFS(prog) \
+ == RX_OFFS(prog)[0].end)
#endif /* PLUGGABLE_RE_EXTENSION */
diff --git a/t/bigmem/regexp.t b/t/bigmem/regexp.t
index ef029fbd9b..ef74e59ab7 100644
--- a/t/bigmem/regexp.t
+++ b/t/bigmem/regexp.t
@@ -12,11 +12,19 @@ $ENV{PERL_TEST_MEMORY} >= 2
$Config{ptrsize} >= 8
or skip_all("Need 64-bit pointers for this test");
-plan(2);
+plan(3);
# [perl #116907]
# ${\2} to defeat constant folding, which in this case actually slows
# things down
-my $x=" "x(${\2}**31);
+my $x=" "x(${\2}**31) . "abcdefg";
ok $x =~ /./, 'match against long string succeeded';
is "$-[0]-$+[0]", '0-1', '@-/@+ after match against long string';
+
+pos $x = 2**31-1;
+my $result;
+for(1..5) {
+ $x =~ /./g;
+ $result .= "$&-";
+}
+is $result," -a-b-c-d-", 'scalar //g hopping past the 2**31 threshold';