diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2013-12-27 12:23:25 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2013-12-27 12:23:25 +0000 |
commit | 5b52324dc797364680c782b151f7d0f8aa8618ad (patch) | |
tree | 56011d0a72aea26b87224dccb9b1cbbe456483de | |
parent | 52788c0f63139de170cfe1b3769fa1d5a97d9147 (diff) | |
download | pcre-5b52324dc797364680c782b151f7d0f8aa8618ad.tar.gz |
Fix pcretest's handling of patterns when \K in an assertion sets the start of a
match past the end of the match.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1418 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 6 | ||||
-rw-r--r-- | doc/pcrepattern.3 | 8 | ||||
-rw-r--r-- | doc/pcresyntax.3 | 6 | ||||
-rw-r--r-- | pcretest.c | 21 | ||||
-rw-r--r-- | testdata/testinput2 | 3 | ||||
-rw-r--r-- | testdata/testoutput2 | 6 |
6 files changed, 42 insertions, 8 deletions
@@ -14,6 +14,12 @@ Version 8.35-RC1 xx-xxxx-201x 3. Got rid of some compiler warnings for potentially uninitialized variables that show up only when compiled with -O2. + +4. A pattern such as (?=ab\K) that uses \K in an assertion can set the start + of a match later then the end of the match. The pcretest program was not + handling the case sensibly - it was outputting from the start to the next + binary zero. It now reports this situation in a message, and outputs the + text from the end to the start. Version 8.34 15-December-2013 diff --git a/doc/pcrepattern.3 b/doc/pcrepattern.3 index 4c515f8..86299d2 100644 --- a/doc/pcrepattern.3 +++ b/doc/pcrepattern.3 @@ -1,4 +1,4 @@ -.TH PCREPATTERN 3 "03 December 2013" "PCRE 8.34" +.TH PCREPATTERN 3 "27 December 2013" "PCRE 8.35" .SH NAME PCRE - Perl-compatible regular expressions .SH "PCRE REGULAR EXPRESSION DETAILS" @@ -1004,7 +1004,9 @@ matches "foobar", the first substring is still set to "foo". .P Perl documents that the use of \eK within assertions is "not well defined". In PCRE, \eK is acted upon when it occurs inside positive assertions, but is -ignored in negative assertions. +ignored in negative assertions. Note that when a pattern such as (?=ab\eK) +matches, the reported start of the match can be greater than the end of the +match. . . .\" HTML <a name="smallassertions"></a> @@ -3255,6 +3257,6 @@ Cambridge CB2 3QH, England. .rs .sp .nf -Last updated: 03 December 2013 +Last updated: 27 December 2013 Copyright (c) 1997-2013 University of Cambridge. .fi diff --git a/doc/pcresyntax.3 b/doc/pcresyntax.3 index 87f0cea..be442f4 100644 --- a/doc/pcresyntax.3 +++ b/doc/pcresyntax.3 @@ -1,4 +1,4 @@ -.TH PCRESYNTAX 3 "12 November 2013" "PCRE 8.34" +.TH PCRESYNTAX 3 "27 December 2013" "PCRE 8.35" .SH NAME PCRE - Perl-compatible regular expressions .SH "PCRE REGULAR EXPRESSION SYNTAX SUMMARY" @@ -309,6 +309,8 @@ but some of them use Unicode properties if PCRE_UCP is set. You can use .rs .sp \eK reset start of match +.sp +\eK is honoured in positive assertions, but ignored in negative ones. . . .SH "ALTERNATION" @@ -508,6 +510,6 @@ Cambridge CB2 3QH, England. .rs .sp .nf -Last updated: 12 November 2013 +Last updated: 27 December 2013 Copyright (c) 1997-2013 University of Cambridge. .fi @@ -5192,7 +5192,8 @@ while (!done) if (count * 2 > use_size_offsets) count = use_size_offsets/2; } - /* Output the captured substrings */ + /* Output the captured substrings. Note that, for the matched string, + the use of \K in an assertion can make the start later than the end. */ for (i = 0; i < count * 2; i += 2) { @@ -5208,11 +5209,25 @@ while (!done) } else { + int start = use_offsets[i]; + int end = use_offsets[i+1]; + + if (start > end) + { + start = use_offsets[i+1]; + end = use_offsets[i]; + fprintf(outfile, "Start of matched string is beyond its end - " + "displaying from end to start.\n"); + } + fprintf(outfile, "%2d: ", i/2); - PCHARSV(bptr, use_offsets[i], - use_offsets[i+1] - use_offsets[i], outfile); + PCHARSV(bptr, start, end - start, outfile); if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)"); fprintf(outfile, "\n"); + + /* Note: don't use the start/end variables here because we want to + show the text from what is reported as the end. */ + if (do_showcaprest || (i == 0 && do_showrest)) { fprintf(outfile, "%2d+ ", i/2); diff --git a/testdata/testinput2 b/testdata/testinput2 index 00924ee..072650c 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4045,4 +4045,7 @@ backtracking verbs. --/ /[a[:<:]] should give error/ +/(?=ab\K)/+ + abcd + /-- End of testinput2 --/ diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 844497a..70e7ceb 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -14125,4 +14125,10 @@ No match /[a[:<:]] should give error/ Failed: unknown POSIX class name at offset 4 +/(?=ab\K)/+ + abcd +Start of matched string is beyond its end - displaying from end to start. + 0: ab + 0+ abcd + /-- End of testinput2 --/ |