summaryrefslogtreecommitdiff
path: root/regexec.c
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2014-03-10 17:53:48 +0000
committerDavid Mitchell <davem@iabyn.com>2014-03-16 18:03:50 +0000
commitc43b55202cde0bd54e79e98bc90ceaecb135c151 (patch)
tree2ef609f00befd6a023bc19125f54f1ef78f57449 /regexec.c
parent83f2232dc70fcf91e6341ae35e5eb6cdc7f2365e (diff)
downloadperl-c43b55202cde0bd54e79e98bc90ceaecb135c151.tar.gz
re_intuit_start(): eliminate checked_upto var
This var is supposed to record the latest place that stclass had rejected, so if we try again, we don't start any earlier than that. However, an assert showed that nothing in the test suite ever left checked_upto > rx_origin on re-entry, so we could always use rx_origin directly. On failure, checked_upto is reset to HOPBACKc(endpos, start_shift) where endpoint is the highest char that find_byclass() was asked to search to. Now, I think that this is a logical error; it should have been HOPBACKc(endpos, cl_l) or similar; i.e. just hop back the number of chars equal to the length of the character class; using start_shift just makes checked_upto artificially too small. But in either case, consider the following more formal analysis (the arithmetic below is in terms of chars). Assume initially at least, that checked_upto <= rx_origin. The question is whether we can ever end up with checked_upto "overtaking" rx_origin. Where there is an anchored substring or ml_anch, we have: endpos = r_origin + cl_l; on failure, checked_upto = endpos - (start_shift or cl_l) since start_shift should be => cl_l, in either case we end up with checked_upto <= rx_origin. Where there is only a floating substring, we have: endpos = r_origin - start_shift + cl_l; on failure, checked_upto = endpos - (start_shift or cl_l) again, since start_shift should be => cl_l, in either case we end up with checked_upto <= rx_origin. Where there are no substrings, we return on failure; so the updating checked_upto is irrelevant. On success we return, so again the updating of checked_upto is irrelevant.
Diffstat (limited to 'regexec.c')
-rw-r--r--regexec.c15
1 files changed, 3 insertions, 12 deletions
diff --git a/regexec.c b/regexec.c
index 9868927ac6..2b9e3f6cc0 100644
--- a/regexec.c
+++ b/regexec.c
@@ -646,7 +646,6 @@ Perl_re_intuit_start(pTHX_
bool ml_anch = 0;
char *other_last = strpos;/* latest pos 'other' substr already checked to */
char *check_at = NULL; /* check substr found at this pos */
- char *checked_upto = NULL; /* how far into the string we have already checked using find_byclass*/
const I32 multiline = prog->extflags & RXf_PMf_MULTILINE;
RXi_GET_DECL(prog,progi);
regmatch_info reginfo_buf; /* create some info to pass to find_byclass */
@@ -1203,16 +1202,13 @@ Perl_re_intuit_start(pTHX_
else
endpos= strend;
- if (checked_upto < rx_origin)
- checked_upto = rx_origin;
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
" looking for class: start_shift: %"IVdf" check_at: %"IVdf
- " rx_origin: %"IVdf" endpos: %"IVdf" checked_upto: %"IVdf"\n",
+ " rx_origin: %"IVdf" endpos: %"IVdf"\n",
(IV)start_shift, (IV)(check_at - strbeg),
- (IV)(rx_origin - strbeg), (IV)(endpos - strbeg),
- (IV)(checked_upto- strbeg)));
+ (IV)(rx_origin - strbeg), (IV)(endpos - strbeg)));
- s = find_byclass(prog, progi->regstclass, checked_upto, endpos,
+ s = find_byclass(prog, progi->regstclass, rx_origin, endpos,
reginfo);
if (!s) {
if (endpos == strend) {
@@ -1224,9 +1220,6 @@ Perl_re_intuit_start(pTHX_
" This position contradicts STCLASS...\n") );
if ((prog->intflags & PREGf_ANCH) && !ml_anch)
goto fail;
- checked_upto = HOPBACKc(endpos, start_shift);
- DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " start_shift: %"IVdf" check_at: %"IVdf" endpos: %"IVdf" checked_upto: %"IVdf"\n",
- (IV)start_shift, (IV)(check_at - strbeg), (IV)(endpos - strbeg), (IV)(checked_upto- strbeg)));
/* Contradict one of substrings */
if (prog->anchored_substr || prog->anchored_utf8) {
@@ -1279,8 +1272,6 @@ Perl_re_intuit_start(pTHX_
goto restart;
}
- checked_upto = s;
-
if (rx_origin != s) {
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
" By STCLASS: moving %ld --> %ld\n",