diff options
author | ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> | 2018-01-01 14:54:06 +0000 |
---|---|---|
committer | ph10 <ph10@6239d852-aaf2-0410-a92c-79f79f948069> | 2018-01-01 14:54:06 +0000 |
commit | 1b5d77c6edc5ee8e8fe5c96bf9cad5798d6ce36c (patch) | |
tree | aed17b13b53c9fb22bf36e0aacae86b59fe55e8b /src/pcre2_dfa_match.c | |
parent | 5af1eb2d55bb06fde6084fa3cb6e3219b932ec2b (diff) | |
download | pcre2-1b5d77c6edc5ee8e8fe5c96bf9cad5798d6ce36c.tar.gz |
Previous FIRSTLINE patch was broken. Fix it.
git-svn-id: svn://vcs.exim.org/pcre2/code/trunk@900 6239d852-aaf2-0410-a92c-79f79f948069
Diffstat (limited to 'src/pcre2_dfa_match.c')
-rw-r--r-- | src/pcre2_dfa_match.c | 27 |
1 files changed, 11 insertions, 16 deletions
diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c index 9c1d805..65243bf 100644 --- a/src/pcre2_dfa_match.c +++ b/src/pcre2_dfa_match.c @@ -3363,8 +3363,6 @@ for (;;) if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 && (options & PCRE2_DFA_RESTART) == 0) { - PCRE2_SPTR save_end_subject = end_subject; - /* If firstline is TRUE, the start of the match is constrained to the first line of a multiline string. That is, the match must be before or at the first newline following the start of matching. Temporarily adjust @@ -3388,13 +3386,6 @@ for (;;) else #endif while (t < end_subject && !IS_NEWLINE(t)) t++; - - /* Note that we only need to advance by one code unit if we found a - newline. If the newline is CRLF, a first code unit of LF should not - match, because it is not at or before the newline. Similarly, only the - first code unit of a Unicode newline might be relevant. */ - - if (t < end_subject) t++; end_subject = t; } @@ -3466,14 +3457,18 @@ for (;;) #endif } - /* If we can't find the required code unit, break the bumpalong loop, - to force a match failure, except when doing partial matching, when we - let the next cycle run at the end of the subject. To see why, consider - the pattern /(?<=abc)def/, which partially matches "abc", even though - the string does not contain the starting character "d". */ + /* If we can't find the required code unit, having reached the true end + of the subject, break the bumpalong loop, to force a match failure, + except when doing partial matching, when we let the next cycle run at + the end of the subject. To see why, consider the pattern /(?<=abc)def/, + which partially matches "abc", even though the string does not contain + the starting character "d". If we have not reached the true end of the + subject (PCRE2_FIRSTLINE caused end_subject to be temporarily modified) + we also let the cycle run, because the matching string is legitimately + allowed to start with the first code unit of a newline. */ if ((mb->moptions & (PCRE2_PARTIAL_HARD|PCRE2_PARTIAL_SOFT)) == 0 && - start_match >= end_subject) + start_match >= mb->end_subject) break; } @@ -3532,7 +3527,7 @@ for (;;) /* Restore fudged end_subject */ - end_subject = save_end_subject; + end_subject = mb->end_subject; /* The following two optimizations are disabled for partial matching. */ |