summaryrefslogtreecommitdiff
path: root/src/pcre2_dfa_match.c
diff options
context:
space:
mode:
authorph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2018-01-01 14:54:06 +0000
committerph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069>2018-01-01 14:54:06 +0000
commit1b5d77c6edc5ee8e8fe5c96bf9cad5798d6ce36c (patch)
treeaed17b13b53c9fb22bf36e0aacae86b59fe55e8b /src/pcre2_dfa_match.c
parent5af1eb2d55bb06fde6084fa3cb6e3219b932ec2b (diff)
downloadpcre2-1b5d77c6edc5ee8e8fe5c96bf9cad5798d6ce36c.tar.gz
Previous FIRSTLINE patch was broken. Fix it.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@900 6239d852-aaf2-0410-a92c-79f79f948069
Diffstat (limited to 'src/pcre2_dfa_match.c')
-rw-r--r--src/pcre2_dfa_match.c27
1 files changed, 11 insertions, 16 deletions
diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c
index 9c1d805..65243bf 100644
--- a/src/pcre2_dfa_match.c
+++ b/src/pcre2_dfa_match.c
@@ -3363,8 +3363,6 @@ for (;;)
if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
(options & PCRE2_DFA_RESTART) == 0)
{
- PCRE2_SPTR save_end_subject = end_subject;
-
/* If firstline is TRUE, the start of the match is constrained to the first
line of a multiline string. That is, the match must be before or at the
first newline following the start of matching. Temporarily adjust
@@ -3388,13 +3386,6 @@ for (;;)
else
#endif
while (t < end_subject && !IS_NEWLINE(t)) t++;
-
- /* Note that we only need to advance by one code unit if we found a
- newline. If the newline is CRLF, a first code unit of LF should not
- match, because it is not at or before the newline. Similarly, only the
- first code unit of a Unicode newline might be relevant. */
-
- if (t < end_subject) t++;
end_subject = t;
}
@@ -3466,14 +3457,18 @@ for (;;)
#endif
}
- /* If we can't find the required code unit, break the bumpalong loop,
- to force a match failure, except when doing partial matching, when we
- let the next cycle run at the end of the subject. To see why, consider
- the pattern /(?<=abc)def/, which partially matches "abc", even though
- the string does not contain the starting character "d". */
+ /* If we can't find the required code unit, having reached the true end
+ of the subject, break the bumpalong loop, to force a match failure,
+ except when doing partial matching, when we let the next cycle run at
+ the end of the subject. To see why, consider the pattern /(?<=abc)def/,
+ which partially matches "abc", even though the string does not contain
+ the starting character "d". If we have not reached the true end of the
+ subject (PCRE2_FIRSTLINE caused end_subject to be temporarily modified)
+ we also let the cycle run, because the matching string is legitimately
+ allowed to start with the first code unit of a newline. */
if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0 &&
- start_match >= end_subject)
+ start_match >= mb->end_subject)
break;
}
@@ -3532,7 +3527,7 @@ for (;;)
/* Restore fudged end_subject */
- end_subject = save_end_subject;
+ end_subject = mb->end_subject;
/* The following two optimizations are disabled for partial matching. */