From e3c6feb028015ee55e45cf6a3be0176069fdeb3b Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Tue, 4 Feb 2014 20:26:20 +0000 Subject: re_intuit_start(): rearrange /^/m code After matching the "check" and "other" strings, we check that rx_origin is at a \n in the presence of /^../m. The code that does this is in one half of an if-statement, with a couple of labels and gotos that get us to and from the other half of the if statement. Re-arrange the code so that the /^../m is done on its own before the if. This removes a couple of labels and gotos and makes the code clearer. Basically we went from: if (rx_origin != strpos) { if (ml_anch && COND_A) { find_anchor: LOOK_FOR_ANCHOR... } REST_A; } else { if (ml_anch && COND_B) { goto find_anchor; } REST_B; } to: if (rx_origin != strpos && (ml_anch && COND_A) || rx_origin == strpos && (ml_anch && COND_B)) { find_anchor: LOOK_FOR_ANCHOR... } ... } if (rx_origin != strpos) { REST_A; else { REST_B; } The next couple of commits will re-indent and simplify the condition a bit. --- regexec.c | 60 +++++++++++++++++++++++++++++++----------------------------- 1 file changed, 31 insertions(+), 29 deletions(-) (limited to 'regexec.c') diff --git a/regexec.c b/regexec.c index 4a9a82d37e..5ad1dcf9dc 100644 --- a/regexec.c +++ b/regexec.c @@ -1086,20 +1086,25 @@ Perl_re_intuit_start(pTHX_ } postprocess_substr_matches: - if (rx_origin != strpos) { - /* Fixed substring is found far enough so that the match - cannot start at strpos. */ - char *t; - DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " try at offset...\n")); - if (ml_anch && rx_origin[-1] != '\n') { + /* handle the extra constraint of /^/m */ + + if ( ((rx_origin != strpos) && (ml_anch && rx_origin[-1] != '\n')) + || ((rx_origin == strpos) && + (ml_anch && (strpos != strbeg) && strpos[-1] != '\n' + /* May be due to an implicit anchor of m{.*foo} */ + && !(prog->intflags & PREGf_IMPLICIT)))) + { + char *t; + /* Eventually fbm_*() should handle this, but often anchored_offset is not 0, so this check will not be wasted. */ /* XXXX In the code below we prefer to look for "^" even in presence of anchored substrings. And we search even beyond the found float position. These pessimizations are historical artefacts only. */ - find_anchor: + DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, + " looking for /^/m anchor")); t = rx_origin; while (t < strend - prog->minlen) { if (*t == '\n') { @@ -1123,7 +1128,7 @@ Perl_re_intuit_start(pTHX_ s = t + 1; DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " Found /%s^%s/m at offset %ld...\n", PL_colors[0], PL_colors[1], (long)(s - i_strpos))); - goto set_useful; + break; /* success: found anchor */ } /* Position contradicts check-string */ /* XXXX probably better to look for check-string @@ -1135,16 +1140,26 @@ Perl_re_intuit_start(pTHX_ } t++; } - DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " Did not find /%s^%s/m...\n", + if (t >= strend - prog->minlen) { + DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " Did not find /%s^%s/m...\n", PL_colors[0], PL_colors[1])); - goto fail_finish; - } - else { - DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " Starting position does not contradict /%s^%s/m...\n", - PL_colors[0], PL_colors[1])); - } + goto fail_finish; + } + } + else { + DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " Starting position does not contradict /%s^%s/m...\n", + PL_colors[0], PL_colors[1])); + } + + + /* Decide whether using the substrings helped */ + + if (rx_origin != strpos) { + /* Fixed substring is found far enough so that the match + cannot start at strpos. */ + + DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " try at offset...\n")); s = rx_origin; - set_useful: ++BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr); /* hooray/5 */ } else { @@ -1152,19 +1167,6 @@ Perl_re_intuit_start(pTHX_ - no optimization of calling REx engine can be performed, unless it was an MBOL and we are not after MBOL, or a future STCLASS check will fail this. */ - DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " try at start...\n")); - /* Even in this situation we may use MBOL flag if strpos is offset - wrt the start of the string. */ - if (ml_anch && (strpos != strbeg) && strpos[-1] != '\n' - /* May be due to an implicit anchor of m{.*foo} */ - && !(prog->intflags & PREGf_IMPLICIT)) - { - goto find_anchor; - } - DEBUG_EXECUTE_r( if (ml_anch) - PerlIO_printf(Perl_debug_log, " Position at offset %ld does not contradict /%s^%s/m...\n", - (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]); - ); success_at_start: if (!(prog->intflags & PREGf_NAUGHTY) /* XXXX If strpos moved? */ && (utf8_target ? ( -- cgit v1.2.1