summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Mitchell <davem@iabyn.com>2014-02-04 20:26:20 +0000
committerDavid Mitchell <davem@iabyn.com>2014-02-08 13:50:22 +0000
commite3c6feb028015ee55e45cf6a3be0176069fdeb3b (patch)
treeef577b9344e402051336694b620e02caea6743ec
parentb52b7737b2243b234d5171a27622937feb18c422 (diff)
downloadperl-e3c6feb028015ee55e45cf6a3be0176069fdeb3b.tar.gz
re_intuit_start(): rearrange /^/m code
After matching the "check" and "other" strings, we check that rx_origin is at a \n in the presence of /^../m. The code that does this is in one half of an if-statement, with a couple of labels and gotos that get us to and from the other half of the if statement. Re-arrange the code so that the /^../m is done on its own before the if. This removes a couple of labels and gotos and makes the code clearer. Basically we went from: if (rx_origin != strpos) { if (ml_anch && COND_A) { find_anchor: LOOK_FOR_ANCHOR... } REST_A; } else { if (ml_anch && COND_B) { goto find_anchor; } REST_B; } to: if (rx_origin != strpos && (ml_anch && COND_A) || rx_origin == strpos && (ml_anch && COND_B)) { find_anchor: LOOK_FOR_ANCHOR... } ... } if (rx_origin != strpos) { REST_A; else { REST_B; } The next couple of commits will re-indent and simplify the condition a bit.
-rw-r--r--regexec.c60
1 files changed, 31 insertions, 29 deletions
diff --git a/regexec.c b/regexec.c
index 4a9a82d37e..5ad1dcf9dc 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1086,20 +1086,25 @@ Perl_re_intuit_start(pTHX_
}
postprocess_substr_matches:
- if (rx_origin != strpos) {
- /* Fixed substring is found far enough so that the match
- cannot start at strpos. */
- char *t;
- DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " try at offset...\n"));
- if (ml_anch && rx_origin[-1] != '\n') {
+ /* handle the extra constraint of /^/m */
+
+ if ( ((rx_origin != strpos) && (ml_anch && rx_origin[-1] != '\n'))
+ || ((rx_origin == strpos) &&
+ (ml_anch && (strpos != strbeg) && strpos[-1] != '\n'
+ /* May be due to an implicit anchor of m{.*foo} */
+ && !(prog->intflags & PREGf_IMPLICIT))))
+ {
+ char *t;
+
/* Eventually fbm_*() should handle this, but often
anchored_offset is not 0, so this check will not be wasted. */
/* XXXX In the code below we prefer to look for "^" even in
presence of anchored substrings. And we search even
beyond the found float position. These pessimizations
are historical artefacts only. */
- find_anchor:
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
+ " looking for /^/m anchor"));
t = rx_origin;
while (t < strend - prog->minlen) {
if (*t == '\n') {
@@ -1123,7 +1128,7 @@ Perl_re_intuit_start(pTHX_
s = t + 1;
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " Found /%s^%s/m at offset %ld...\n",
PL_colors[0], PL_colors[1], (long)(s - i_strpos)));
- goto set_useful;
+ break; /* success: found anchor */
}
/* Position contradicts check-string */
/* XXXX probably better to look for check-string
@@ -1135,16 +1140,26 @@ Perl_re_intuit_start(pTHX_
}
t++;
}
- DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " Did not find /%s^%s/m...\n",
+ if (t >= strend - prog->minlen) {
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " Did not find /%s^%s/m...\n",
PL_colors[0], PL_colors[1]));
- goto fail_finish;
- }
- else {
- DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " Starting position does not contradict /%s^%s/m...\n",
- PL_colors[0], PL_colors[1]));
- }
+ goto fail_finish;
+ }
+ }
+ else {
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " Starting position does not contradict /%s^%s/m...\n",
+ PL_colors[0], PL_colors[1]));
+ }
+
+
+ /* Decide whether using the substrings helped */
+
+ if (rx_origin != strpos) {
+ /* Fixed substring is found far enough so that the match
+ cannot start at strpos. */
+
+ DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " try at offset...\n"));
s = rx_origin;
- set_useful:
++BmUSEFUL(utf8_target ? prog->check_utf8 : prog->check_substr); /* hooray/5 */
}
else {
@@ -1152,19 +1167,6 @@ Perl_re_intuit_start(pTHX_
- no optimization of calling REx engine can be performed,
unless it was an MBOL and we are not after MBOL,
or a future STCLASS check will fail this. */
- DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, " try at start...\n"));
- /* Even in this situation we may use MBOL flag if strpos is offset
- wrt the start of the string. */
- if (ml_anch && (strpos != strbeg) && strpos[-1] != '\n'
- /* May be due to an implicit anchor of m{.*foo} */
- && !(prog->intflags & PREGf_IMPLICIT))
- {
- goto find_anchor;
- }
- DEBUG_EXECUTE_r( if (ml_anch)
- PerlIO_printf(Perl_debug_log, " Position at offset %ld does not contradict /%s^%s/m...\n",
- (long)(strpos - i_strpos), PL_colors[0], PL_colors[1]);
- );
success_at_start:
if (!(prog->intflags & PREGf_NAUGHTY) /* XXXX If strpos moved? */
&& (utf8_target ? (