summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2013-06-23 13:30:59 +0100
committerDavid Mitchell <davem@iabyn.com>2013-07-28 10:33:36 +0100
commit03c83e260b8bf6c564b49840536ccd6f47a794ac (patch)
tree330552035f00d63622becfc5766a0c2cac9839bd
parent8fe3c67a9d33417c317107b8f5a56ca88d4153a9 (diff)
downloadperl-03c83e260b8bf6c564b49840536ccd6f47a794ac.tar.gz
regexec: handle \G ourself, rather than in callers
Normally a /g match starts its processing at the previous pos() (or at char 0 if pos is not set); however in the case of something like /abc\G/ we actually need to start 3 characters before pos. This has been handled by the *callers* of regexec() subtracting prog->gofs from the stringarg arg before calling it, or by setting stringarg to strbeg for floating, such as /\w+\G/. This is clearly wrong: the callers of regexec() shouldn't need to worry about the details of getting \G right: move this code into regexec() itself. (Note that although this commit passes all tests, it quite possibly isn't logically correct. It will get fixed up further during the next few commits)
-rw-r--r--pp_ctl.c9
-rw-r--r--pp_hot.c27
-rw-r--r--regexec.c42
-rw-r--r--regexp.h3
4 files changed, 53 insertions, 28 deletions
diff --git a/pp_ctl.c b/pp_ctl.c
index 5c6999b1c3..87eadd2aeb 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -226,14 +226,9 @@ PP(pp_substcont)
if (SvTAINTED(TOPs))
cx->sb_rxtainted |= SUBST_TAINT_REPL;
sv_catsv_nomg(dstr, POPs);
- /* XXX: the RX_GOFS stuff is to adjust for positive offsets of
- * \G for instance s/(.)\G//g with positive pos(). See #69056 and #114884
- * This whole \G thing makes a *lot* of things more difficult than they
- * should be. - Yves */
- /* Are we done */
if (CxONCE(cx) || s < orig ||
- !CALLREGEXEC(rx, s - RX_GOFS(rx), cx->sb_strend, orig,
- (s == m) + RX_GOFS(rx), cx->sb_targ, NULL,
+ !CALLREGEXEC(rx, s, cx->sb_strend, orig,
+ (s == m), cx->sb_targ, NULL,
(REXEC_IGNOREPOS|REXEC_NOT_FIRST)))
{
SV *targ = cx->sb_targ;
diff --git a/pp_hot.c b/pp_hot.c
index bf4aca7d03..ee740c21fb 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -1330,9 +1330,7 @@ PP(pp_match)
bool rxtainted;
const I32 gimme = GIMME;
STRLEN len;
- I32 minmatch = 0;
const I32 oldsave = PL_savestack_ix;
- I32 update_minmatch = 1;
I32 had_zerolen = 0;
if (PL_op->op_flags & OPf_STACKED)
@@ -1396,8 +1394,11 @@ PP(pp_match)
}
else if (!(RX_EXTFLAGS(rx) & RXf_GPOS_FLOAT))
curpos = mg->mg_len;
- minmatch = (mg->mg_flags & MGf_MINMATCH) ? RX_GOFS(rx) + 1 : 0;
- update_minmatch = 0;
+ else
+ curpos = mg->mg_len;
+ /* last time pos() was set, it was zero-length match */
+ if (mg->mg_flags & MGf_MINMATCH)
+ had_zerolen = 1;
}
}
#ifdef PERL_SAWAMPERSAND
@@ -1420,17 +1421,11 @@ PP(pp_match)
play_it_again:
if (global) {
- s = truebase + curpos - RX_GOFS(rx);
- if ((s + RX_MINLEN(rx)) > strend || s < truebase) {
- DEBUG_r(PerlIO_printf(Perl_debug_log, "Regex match can't succeed, so not even tried\n"));
- goto nope;
- }
- if (update_minmatch++)
- minmatch = had_zerolen;
+ s = truebase + curpos;
}
if (!CALLREGEXEC(rx, (char*)s, (char *)strend, (char*)truebase,
- minmatch, TARG, NULL, r_flags))
+ had_zerolen, TARG, NULL, r_flags))
goto nope;
PL_curpm = pm;
@@ -1453,9 +1448,10 @@ PP(pp_match)
if (!mg) {
mg = sv_magicext_mglob(TARG);
}
+ assert(RX_OFFS(rx)[0].start != -1); /* XXX get rid of next line? */
if (RX_OFFS(rx)[0].start != -1) {
mg->mg_len = RX_OFFS(rx)[0].end;
- if (RX_OFFS(rx)[0].start + RX_GOFS(rx) == (UV)RX_OFFS(rx)[0].end)
+ if (RX_ZERO_LEN(rx))
mg->mg_flags |= MGf_MINMATCH;
else
mg->mg_flags &= ~MGf_MINMATCH;
@@ -1493,11 +1489,8 @@ PP(pp_match)
}
}
if (global) {
- assert(RX_OFFS(rx)[0].start != -1);
curpos = (UV)RX_OFFS(rx)[0].end;
- had_zerolen = (RX_OFFS(rx)[0].start != -1
- && (RX_OFFS(rx)[0].start + RX_GOFS(rx)
- == (UV)curpos));
+ had_zerolen = RX_ZERO_LEN(rx);
PUTBACK; /* EVAL blocks may use stack */
r_flags |= REXEC_IGNOREPOS | REXEC_NOT_FIRST;
goto play_it_again;
diff --git a/regexec.c b/regexec.c
index 0b81bab23f..b05b1521a0 100644
--- a/regexec.c
+++ b/regexec.c
@@ -2211,7 +2211,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
struct regexp *const prog = ReANY(rx);
char *s;
regnode *c;
- char *startpos = stringarg;
+ char *startpos;
I32 minlen; /* must match at least this many chars */
I32 dontbother = 0; /* how many characters not to try at end */
I32 end_shift = 0; /* Same for the end. */ /* CC */
@@ -2230,16 +2230,51 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
PERL_UNUSED_ARG(data);
/* Be paranoid... */
- if (prog == NULL || startpos == NULL) {
+ if (prog == NULL || stringarg == NULL) {
Perl_croak(aTHX_ "NULL regexp parameter");
return 0;
}
DEBUG_EXECUTE_r(
- debug_start_match(rx, utf8_target, startpos, strend,
+ debug_start_match(rx, utf8_target, stringarg, strend,
"Matching");
);
+ if (prog->extflags & RXf_GPOS_SEEN) {
+ /* in the presence of \G, we may need to start looking earlier in
+ * the string than the suggested start point of stringarg:
+ * if gofs->prog is set, then that's a known, fixed minimum
+ * offset, such as
+ * /..\G/: gofs = 2
+ * /ab|c\G/: gofs = 1
+ * or if the minimum offset isn't known, then we have to go back
+ * to the start of the string, e.g. /w+\G/
+ */
+ if (prog->gofs) {
+ if (stringarg - prog->gofs < strbeg) {
+ minend += (stringarg - strbeg);
+ stringarg = strbeg;
+ }
+ else {
+ stringarg -= prog->gofs;
+ minend += prog->gofs;
+ }
+ }
+ else if (prog->extflags & RXf_GPOS_FLOAT) {
+ minend += (stringarg - strbeg);
+ stringarg = strbeg;
+ }
+ }
+
+ minlen = prog->minlen;
+ if ((stringarg + minlen) > strend || stringarg < strbeg) {
+ DEBUG_r(PerlIO_printf(Perl_debug_log,
+ "Regex match can't succeed, so not even tried\n"));
+ return 0;
+ }
+
+ startpos = stringarg;
+
if ((RX_EXTFLAGS(rx) & RXf_USE_INTUIT)
&& !(flags & REXEC_CHECKED))
{
@@ -2279,7 +2314,6 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend,
oldsave = PL_savestack_ix;
multiline = prog->extflags & RXf_PMf_MULTILINE;
- minlen = prog->minlen;
if (strend - startpos < (minlen+(prog->check_offset_min<0?prog->check_offset_min:0))) {
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
diff --git a/regexp.h b/regexp.h
index a86963c020..5fb85ec684 100644
--- a/regexp.h
+++ b/regexp.h
@@ -501,6 +501,9 @@ get_regex_charset_name(const U32 flags, STRLEN* const lenp)
#define RX_LASTPAREN(prog) (ReANY(prog)->lastparen)
#define RX_LASTCLOSEPAREN(prog) (ReANY(prog)->lastcloseparen)
#define RX_SAVED_COPY(prog) (ReANY(prog)->saved_copy)
+/* last match was zero-length */
+#define RX_ZERO_LEN(prog) \
+ (RX_OFFS(prog)[0].start + RX_GOFS(prog) == (UV)RX_OFFS(prog)[0].end)
#endif /* PLUGGABLE_RE_EXTENSION */