summaryrefslogtreecommitdiff
path: root/pp_hot.c
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2013-06-16 13:26:30 +0100
committerDavid Mitchell <davem@iabyn.com>2013-07-28 10:33:34 +0100
commit4d2e8fb573b7e5488f92e2fcdad09d15b54e35c3 (patch)
tree2c39f4290f9090bced10d107b16ec0f8271c1cb9 /pp_hot.c
parent60165aa4bc7ef12661c54168e5dd07abb3881564 (diff)
downloadperl-4d2e8fb573b7e5488f92e2fcdad09d15b54e35c3.tar.gz
re-enable intuit-only matches
The COW changes inadvertently disabled intuit-only matches. These are where calling intuit_start() to find the starting point for a match is enough to know that the whole pattern will match, and so you can skip calling regexec() too. For example, fixed strings without captures such as /abc/. The COW breakage meant that regexec was always called, making something like /abc/ abut 3 times slower. This commit re-enables intuit-only matches. However, it turns out that this opens up a can of worms. Normally, recording the just-matched-against string so that things like $& and captures work, is done within regexec(). When this is skipped, pp_match has to do a similar thing itself. The code that does this (which is in principle a copy of the code in regexec()) is a bit of a mess. Due to a logic error, a big chunk of it has actually been dead code for 10+ years. Its had lots of modifications (e.g. people have made the same changes to regexec() and pp_match()), but since it never gets executed, errors aren't detected. And the bits that are executed haven't completely received all the COW and SAWAMERSAND updates that have happened recently. The Best way to fix this is is to extract out the capture code in regexec() into a separate function (which we did in the previous commit), then throw away all the broken capture code in pp_match() and replace it with a call to the new function (which this commit does). One side effect of this commit is that as well as restoring intuit-only behaviour for the patterns that used to pre-COW, it also enables this behaviour for patterns which formerly didn't, namely where $& or //p are seen. This commit is the barest minimum necessary to fix this; subsequent commits will clean and improve this.
Diffstat (limited to 'pp_hot.c')
-rw-r--r--pp_hot.c77
1 files changed, 15 insertions, 62 deletions
diff --git a/pp_hot.c b/pp_hot.c
index f3ed6d5138..14f5acab18 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -1439,13 +1439,9 @@ PP(pp_match)
if (!s)
goto nope;
-#ifdef PERL_SAWAMPERSAND
if ( (RX_EXTFLAGS(rx) & RXf_CHECK_ALL)
- && !PL_sawampersand
- && !(RX_EXTFLAGS(rx) & RXf_PMf_KEEPCOPY)
&& !SvROK(TARG)) /* Cannot trust since INTUIT cannot guess ^ */
goto yup;
-#endif
}
if (!CALLREGEXEC(rx, (char*)s, (char *)strend, (char*)truebase,
minmatch, TARG, NUM2PTR(void*, gpos), r_flags))
@@ -1531,9 +1527,8 @@ PP(pp_match)
RETPUSHYES;
}
-#ifdef PERL_SAWAMPERSAND
yup: /* Confirmed by INTUIT */
-#endif
+ assert(!RX_NPARENS(rx));
if (rxtainted)
RX_MATCH_TAINTED_on(rx);
TAINT_IF(RX_MATCH_TAINTED(rx));
@@ -1545,67 +1540,25 @@ yup: /* Confirmed by INTUIT */
dynpm->op_pmflags |= PMf_USED;
#endif
}
- if (RX_MATCH_COPIED(rx))
- Safefree(RX_SUBBEG(rx));
- RX_MATCH_COPIED_off(rx);
- RX_SUBBEG(rx) = NULL;
+
+ RX_MATCH_UTF8_set(rx, cBOOL(DO_UTF8(rx)));
+ if ( !(r_flags & REXEC_NOT_FIRST) )
+ Perl_reg_set_capture_string(aTHX_ rx,
+ (char*)truebase, (char *)strend,
+ TARG, r_flags, cBOOL(DO_UTF8(TARG)));
+
+ /* skipping regexec means that indices for $&, $-[0] etc weren't set */
+ RX_OFFS(rx)[0].start = s - truebase;
+ RX_OFFS(rx)[0].end =
+ RX_MATCH_UTF8(rx)
+ ? (char*)utf8_hop((U8*)s, RX_MINLENRET(rx)) - truebase
+ : s - truebase + RX_MINLENRET(rx);
+
if (global) {
- /* FIXME - should rx->subbeg be const char *? */
- RX_SUBBEG(rx) = (char *) truebase;
- RX_SUBOFFSET(rx) = 0;
- RX_SUBCOFFSET(rx) = 0;
- RX_OFFS(rx)[0].start = s - truebase;
- if (RX_MATCH_UTF8(rx)) {
- char * const t = (char*)utf8_hop((U8*)s, RX_MINLENRET(rx));
- RX_OFFS(rx)[0].end = t - truebase;
- }
- else {
- RX_OFFS(rx)[0].end = s - truebase + RX_MINLENRET(rx);
- }
- RX_SUBLEN(rx) = strend - truebase;
goto gotcha;
}
-#ifdef PERL_SAWAMPERSAND
- if (PL_sawampersand || RX_EXTFLAGS(rx) & RXf_PMf_KEEPCOPY)
-#endif
- {
- I32 off;
-#ifdef PERL_ANY_COW
- if (SvCANCOW(TARG)) {
- if (DEBUG_C_TEST) {
- PerlIO_printf(Perl_debug_log,
- "Copy on write: pp_match $& capture, type %d, truebase=%p, t=%p, difference %d\n",
- (int) SvTYPE(TARG), (void*)truebase, (void*)t,
- (int)(t-truebase));
- }
- RX_SAVED_COPY(rx) = sv_setsv_cow(RX_SAVED_COPY(rx), TARG);
- RX_SUBBEG(rx)
- = (char *) SvPVX_const(RX_SAVED_COPY(rx)) + (t - truebase);
- assert (SvPOKp(RX_SAVED_COPY(rx)));
- } else
-#endif
- {
- RX_SUBBEG(rx) = savepvn(t, strend - t);
-#ifdef PERL_ANY_COW
- RX_SAVED_COPY(rx) = NULL;
-#endif
- }
- RX_SUBLEN(rx) = strend - t;
- RX_SUBOFFSET(rx) = 0;
- RX_SUBCOFFSET(rx) = 0;
- RX_MATCH_COPIED_on(rx);
- off = RX_OFFS(rx)[0].start = s - t;
- RX_OFFS(rx)[0].end = off + RX_MINLENRET(rx);
- }
-#ifdef PERL_SAWAMPERSAND
- else { /* startp/endp are used by @- @+. */
- RX_OFFS(rx)[0].start = s - truebase;
- RX_OFFS(rx)[0].end = s - truebase + RX_MINLENRET(rx);
- }
-#endif
/* match via INTUIT shouldn't have any captures. Let @-, @+, $^N know */
- assert(!RX_NPARENS(rx));
RX_LASTPAREN(rx) = RX_LASTCLOSEPAREN(rx) = 0;
LEAVE_SCOPE(oldsave);
RETPUSHYES;