summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2013-12-27 12:23:25 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2013-12-27 12:23:25 +0000
commit5b52324dc797364680c782b151f7d0f8aa8618ad (patch)
tree56011d0a72aea26b87224dccb9b1cbbe456483de
parent52788c0f63139de170cfe1b3769fa1d5a97d9147 (diff)
downloadpcre-5b52324dc797364680c782b151f7d0f8aa8618ad.tar.gz
Fix pcretest's handling of patterns when \K in an assertion sets the start of a
match past the end of the match. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1418 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog6
-rw-r--r--doc/pcrepattern.38
-rw-r--r--doc/pcresyntax.36
-rw-r--r--pcretest.c21
-rw-r--r--testdata/testinput23
-rw-r--r--testdata/testoutput26
6 files changed, 42 insertions, 8 deletions
diff --git a/ChangeLog b/ChangeLog
index 62fbcbe..b668574 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -14,6 +14,12 @@ Version 8.35-RC1 xx-xxxx-201x
3. Got rid of some compiler warnings for potentially uninitialized variables
that show up only when compiled with -O2.
+
+4. A pattern such as (?=ab\K) that uses \K in an assertion can set the start
+ of a match later then the end of the match. The pcretest program was not
+ handling the case sensibly - it was outputting from the start to the next
+ binary zero. It now reports this situation in a message, and outputs the
+ text from the end to the start.
Version 8.34 15-December-2013
diff --git a/doc/pcrepattern.3 b/doc/pcrepattern.3
index 4c515f8..86299d2 100644
--- a/doc/pcrepattern.3
+++ b/doc/pcrepattern.3
@@ -1,4 +1,4 @@
-.TH PCREPATTERN 3 "03 December 2013" "PCRE 8.34"
+.TH PCREPATTERN 3 "27 December 2013" "PCRE 8.35"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "PCRE REGULAR EXPRESSION DETAILS"
@@ -1004,7 +1004,9 @@ matches "foobar", the first substring is still set to "foo".
.P
Perl documents that the use of \eK within assertions is "not well defined". In
PCRE, \eK is acted upon when it occurs inside positive assertions, but is
-ignored in negative assertions.
+ignored in negative assertions. Note that when a pattern such as (?=ab\eK)
+matches, the reported start of the match can be greater than the end of the
+match.
.
.
.\" HTML <a name="smallassertions"></a>
@@ -3255,6 +3257,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 03 December 2013
+Last updated: 27 December 2013
Copyright (c) 1997-2013 University of Cambridge.
.fi
diff --git a/doc/pcresyntax.3 b/doc/pcresyntax.3
index 87f0cea..be442f4 100644
--- a/doc/pcresyntax.3
+++ b/doc/pcresyntax.3
@@ -1,4 +1,4 @@
-.TH PCRESYNTAX 3 "12 November 2013" "PCRE 8.34"
+.TH PCRESYNTAX 3 "27 December 2013" "PCRE 8.35"
.SH NAME
PCRE - Perl-compatible regular expressions
.SH "PCRE REGULAR EXPRESSION SYNTAX SUMMARY"
@@ -309,6 +309,8 @@ but some of them use Unicode properties if PCRE_UCP is set. You can use
.rs
.sp
\eK reset start of match
+.sp
+\eK is honoured in positive assertions, but ignored in negative ones.
.
.
.SH "ALTERNATION"
@@ -508,6 +510,6 @@ Cambridge CB2 3QH, England.
.rs
.sp
.nf
-Last updated: 12 November 2013
+Last updated: 27 December 2013
Copyright (c) 1997-2013 University of Cambridge.
.fi
diff --git a/pcretest.c b/pcretest.c
index 8452d2b..782c3f7 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -5192,7 +5192,8 @@ while (!done)
if (count * 2 > use_size_offsets) count = use_size_offsets/2;
}
- /* Output the captured substrings */
+ /* Output the captured substrings. Note that, for the matched string,
+ the use of \K in an assertion can make the start later than the end. */
for (i = 0; i < count * 2; i += 2)
{
@@ -5208,11 +5209,25 @@ while (!done)
}
else
{
+ int start = use_offsets[i];
+ int end = use_offsets[i+1];
+
+ if (start > end)
+ {
+ start = use_offsets[i+1];
+ end = use_offsets[i];
+ fprintf(outfile, "Start of matched string is beyond its end - "
+ "displaying from end to start.\n");
+ }
+
fprintf(outfile, "%2d: ", i/2);
- PCHARSV(bptr, use_offsets[i],
- use_offsets[i+1] - use_offsets[i], outfile);
+ PCHARSV(bptr, start, end - start, outfile);
if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
fprintf(outfile, "\n");
+
+ /* Note: don't use the start/end variables here because we want to
+ show the text from what is reported as the end. */
+
if (do_showcaprest || (i == 0 && do_showrest))
{
fprintf(outfile, "%2d+ ", i/2);
diff --git a/testdata/testinput2 b/testdata/testinput2
index 00924ee..072650c 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4045,4 +4045,7 @@ backtracking verbs. --/
/[a[:<:]] should give error/
+/(?=ab\K)/+
+ abcd
+
/-- End of testinput2 --/
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 844497a..70e7ceb 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -14125,4 +14125,10 @@ No match
/[a[:<:]] should give error/
Failed: unknown POSIX class name at offset 4
+/(?=ab\K)/+
+ abcd
+Start of matched string is beyond its end - displaying from end to start.
+ 0: ab
+ 0+ abcd
+
/-- End of testinput2 --/