summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-04-02 10:08:14 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-04-02 10:08:14 +0000
commitf8735bf7b631d53067a040bd9f2c04b8f80c6dae (patch)
treeaa7ebef3c4d31210c14705eaeed55a0716a37f5a
parentd0fc62ee8e85255467ef8541458df6e7f4e01cef (diff)
downloadpcre-f8735bf7b631d53067a040bd9f2c04b8f80c6dae.tar.gz
Previous fix for pcretest was buggy. Try again.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@143 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog3
-rw-r--r--pcretest.c47
-rw-r--r--testdata/testinput26
-rw-r--r--testdata/testinput76
-rw-r--r--testdata/testoutput216
-rw-r--r--testdata/testoutput715
6 files changed, 68 insertions, 25 deletions
diff --git a/ChangeLog b/ChangeLog
index 3ba3ac5..0929836 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -133,7 +133,8 @@ Version 7.1 12-Mar-07
19. In pcretest, if the pattern /(?m)^$/g<any> was matched against the string
"abc\r\n\r\n", it found an unwanted second match after the second \r. This
was because its rules for how to advance for /g after matching an empty
- string did not allow for this case. They now check for it specially.
+ string at the end of a line did not allow for this case. They now check for
+ it specially.
Version 7.0 19-Dec-06
diff --git a/pcretest.c b/pcretest.c
index 0f3017d..a4af200 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -1972,7 +1972,6 @@ while (!done)
for (;; gmatched++) /* Loop for /g or /G */
{
- int gany_fudge;
if (timeitm > 0)
{
register int i;
@@ -2212,11 +2211,18 @@ while (!done)
}
/* Failed to match. If this is a /g or /G loop and we previously set
- g_notempty after a null match, this is not necessarily the end.
- We want to advance the start offset, and continue. In the case of UTF-8
- matching, the advance must be one character, not one byte. Fudge the
- offset values to achieve this. We won't be at the end of the string -
- that was checked before setting g_notempty. */
+ g_notempty after a null match, this is not necessarily the end. We want
+ to advance the start offset, and continue. We won't be at the end of the
+ string - that was checked before setting g_notempty.
+
+ Complication arises in the case when the newline option is "any".
+ If the previous match was at the end of a line terminated by CRLF, an
+ advance of one character just passes the \r, whereas we should prefer the
+ longer newline sequence, as does the code in pcre_exec(). Fudge the
+ offset value to achieve this.
+
+ Otherwise, in the case of UTF-8 matching, the advance must be one
+ character, not one byte. */
else
{
@@ -2224,7 +2230,13 @@ while (!done)
{
int onechar = 1;
use_offsets[0] = start_offset;
- if (use_utf8)
+ if ((((real_pcre *)re)->options & PCRE_NEWLINE_BITS) ==
+ PCRE_NEWLINE_ANY &&
+ start_offset < len - 1 &&
+ bptr[start_offset] == '\r' &&
+ bptr[start_offset+1] == '\n')
+ onechar++;
+ else if (use_utf8)
{
while (start_offset + onechar < len)
{
@@ -2256,39 +2268,26 @@ while (!done)
what Perl's /g options does. This turns out to be rather cunning. First
we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
same point. If this fails (picked up above) we advance to the next
- character.
-
- Yet more complication arises in the case when the newline option is
- "any" and a pattern in multiline mode has to match at the start of a
- line. If a previous match was at the end of a line, and advance of one
- character just passes the \r, whereas we should prefer the longer newline
- sequence, as does the code in pcre_exec(). So we fudge it. */
+ character. */
g_notempty = 0;
- gany_fudge = 0;
if (use_offsets[0] == use_offsets[1])
{
if (use_offsets[0] == len) break;
g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
- if ((((real_pcre *)re)->options & PCRE_STARTLINE) != 0 &&
- (((real_pcre *)re)->options & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY &&
- use_offsets[0] < len - 1 &&
- bptr[use_offsets[0]] == '\r' &&
- bptr[use_offsets[0]+1] == '\n')
- gany_fudge = 1;
}
/* For /g, update the start offset, leaving the rest alone */
- if (do_g) start_offset = use_offsets[1] + gany_fudge;
+ if (do_g) start_offset = use_offsets[1];
/* For /G, update the pointer and length */
else
{
- bptr += use_offsets[1] + gany_fudge;
- len -= use_offsets[1] + gany_fudge;
+ bptr += use_offsets[1];
+ len -= use_offsets[1];
}
} /* End of loop for /g and /G */
diff --git a/testdata/testinput2 b/testdata/testinput2
index 1de01cd..dcfa77f 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -2139,4 +2139,10 @@ a random value. /Ix
/(?m)^$/<any>g+
abc\r\n\r\n
+/(?m)^$|^\r\n/<any>g+
+ abc\r\n\r\n
+
+/(?m)$/<any>g+
+ abc\r\n\r\n
+
/ End of testinput2 /
diff --git a/testdata/testinput7 b/testdata/testinput7
index 4f1dbc5..5c2dd6f 100644
--- a/testdata/testinput7
+++ b/testdata/testinput7
@@ -4243,4 +4243,10 @@
/(?m)^$/<any>g+
abc\r\n\r\n
+/(?m)^$|^\r\n/<any>g+
+ abc\r\n\r\n
+
+/(?m)$/<any>g+
+ abc\r\n\r\n
+
/ End of testinput7 /
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 56806b6..222b8ef 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -8141,4 +8141,20 @@ No match
0:
0+ \x0d\x0a
+/(?m)^$|^\r\n/<any>g+
+ abc\r\n\r\n
+ 0:
+ 0+ \x0d\x0a
+ 0: \x0d\x0a
+ 0+
+
+/(?m)$/<any>g+
+ abc\r\n\r\n
+ 0:
+ 0+ \x0d\x0a\x0d\x0a
+ 0:
+ 0+ \x0d\x0a
+ 0:
+ 0+
+
/ End of testinput2 /
diff --git a/testdata/testoutput7 b/testdata/testoutput7
index f4abe04..6860b66 100644
--- a/testdata/testoutput7
+++ b/testdata/testoutput7
@@ -6975,4 +6975,19 @@ No match
0:
0+ \x0d\x0a
+/(?m)^$|^\r\n/<any>g+
+ abc\r\n\r\n
+ 0: \x0d\x0a
+ 0+
+ 1:
+
+/(?m)$/<any>g+
+ abc\r\n\r\n
+ 0:
+ 0+ \x0d\x0a\x0d\x0a
+ 0:
+ 0+ \x0d\x0a
+ 0:
+ 0+
+
/ End of testinput7 /