diff options
author | David Mitchell <davem@iabyn.com> | 2014-03-10 17:53:48 +0000 |
---|---|---|
committer | David Mitchell <davem@iabyn.com> | 2014-03-16 18:03:50 +0000 |
commit | c43b55202cde0bd54e79e98bc90ceaecb135c151 (patch) | |
tree | 2ef609f00befd6a023bc19125f54f1ef78f57449 /regexec.c | |
parent | 83f2232dc70fcf91e6341ae35e5eb6cdc7f2365e (diff) | |
download | perl-c43b55202cde0bd54e79e98bc90ceaecb135c151.tar.gz |
re_intuit_start(): eliminate checked_upto var
This var is supposed to record the latest place that stclass
had rejected, so if we try again, we don't start any earlier than that.
However, an assert showed that nothing in the test suite ever left
checked_upto > rx_origin on re-entry, so we could always use rx_origin
directly.
On failure, checked_upto is reset to HOPBACKc(endpos, start_shift)
where endpoint is the highest char that find_byclass() was asked to search
to. Now, I think that this is a logical error; it should have been
HOPBACKc(endpos, cl_l) or similar; i.e. just hop back the number of chars
equal to the length of the character class; using start_shift just makes
checked_upto artificially too small.
But in either case, consider the following more formal analysis (the
arithmetic below is in terms of chars).
Assume initially at least, that checked_upto <= rx_origin. The question is
whether we can ever end up with checked_upto "overtaking" rx_origin.
Where there is an anchored substring or ml_anch, we have:
endpos = r_origin + cl_l;
on failure,
checked_upto = endpos - (start_shift or cl_l)
since start_shift should be => cl_l, in either case we end up with
checked_upto <= rx_origin.
Where there is only a floating substring, we have:
endpos = r_origin - start_shift + cl_l;
on failure,
checked_upto = endpos - (start_shift or cl_l)
again, since start_shift should be => cl_l, in either case we end up with
checked_upto <= rx_origin.
Where there are no substrings, we return on failure; so the updating
checked_upto is irrelevant.
On success we return, so again the updating of checked_upto is irrelevant.
Diffstat (limited to 'regexec.c')
-rw-r--r-- | regexec.c | 15 |
1 files changed, 3 insertions, 12 deletions
@@ -646,7 +646,6 @@ Perl_re_intuit_start(pTHX_ bool ml_anch = 0; char *other_last = strpos;/* latest pos 'other' substr already checked to */ char *check_at = NULL; /* check substr found at this pos */ - char *checked_upto = NULL; /* how far into the string we have already checked using find_byclass*/ const I32 multiline = prog->extflags & RXf_PMf_MULTILINE; RXi_GET_DECL(prog,progi); regmatch_info reginfo_buf; /* create some info to pass to find_byclass */ @@ -1203,16 +1202,13 @@ Perl_re_intuit_start(pTHX_ else endpos= strend; - if (checked_upto < rx_origin) - checked_upto = rx_origin; DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " looking for class: start_shift: %"IVdf" check_at: %"IVdf - " rx_origin: %"IVdf" endpos: %"IVdf" checked_upto: %"IVdf"\n", + " rx_origin: %"IVdf" endpos: %"IVdf"\n", (IV)start_shift, (IV)(check_at - strbeg), - (IV)(rx_origin - strbeg), (IV)(endpos - strbeg), - (IV)(checked_upto- strbeg))); + (IV)(rx_origin - strbeg), (IV)(endpos - strbeg))); - s = find_byclass(prog, progi->regstclass, checked_upto, endpos, + s = find_byclass(prog, progi->regstclass, rx_origin, endpos, reginfo); if (!s) { if (endpos == strend) { @@ -1224,9 +1220,6 @@ Perl_re_intuit_start(pTHX_ " This position contradicts STCLASS...\n") ); if ((prog->intflags & PREGf_ANCH) && !ml_anch) goto fail; - checked_upto = HOPBACKc(endpos, start_shift); - DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " start_shift: %"IVdf" check_at: %"IVdf" endpos: %"IVdf" checked_upto: %"IVdf"\n", - (IV)start_shift, (IV)(check_at - strbeg), (IV)(endpos - strbeg), (IV)(checked_upto- strbeg))); /* Contradict one of substrings */ if (prog->anchored_substr || prog->anchored_utf8) { @@ -1279,8 +1272,6 @@ Perl_re_intuit_start(pTHX_ goto restart; } - checked_upto = s; - if (rx_origin != s) { DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " By STCLASS: moving %ld --> %ld\n", |