Previous fix for pcretest was buggy. Try again.

git-svn-id: svn://vcs.exim.org/pcre/code/trunk@143 2f5784b3-3f2a-0410-8824-cb99058d5e15
author: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2007-04-02 10:08:14 +0000
committer: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2007-04-02 10:08:14 +0000
commit: f8735bf7b631d53067a040bd9f2c04b8f80c6dae (patch)
tree: aa7ebef3c4d31210c14705eaeed55a0716a37f5a
parent: d0fc62ee8e85255467ef8541458df6e7f4e01cef (diff)
download: pcre-f8735bf7b631d53067a040bd9f2c04b8f80c6dae.tar.gz
6 files changed, 68 insertions, 25 deletions
diff --git a/ChangeLog b/ChangeLog
index 3ba3ac5..0929836 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -133,7 +133,8 @@ Version 7.1 12-Mar-07
 19. In pcretest, if the pattern /(?m)^$/g<any> was matched against the string
     "abc\r\n\r\n", it found an unwanted second match after the second \r. This
     was because its rules for how to advance for /g after matching an empty
-    string did not allow for this case. They now check for it specially.
+    string at the end of a line did not allow for this case. They now check for
+    it specially.
 
 
 Version 7.0 19-Dec-06
diff --git a/pcretest.c b/pcretest.c
index 0f3017d..a4af200 100644
--- a/pcretest.c
+++ b/pcretest.c
@@ -1972,7 +1972,6 @@ while (!done)
 
     for (;; gmatched++)    /* Loop for /g or /G */
       {
-      int gany_fudge;
       if (timeitm > 0)
         {
         register int i;
@@ -2212,11 +2211,18 @@ while (!done)
         }
 
       /* Failed to match. If this is a /g or /G loop and we previously set
-      g_notempty after a null match, this is not necessarily the end.
-      We want to advance the start offset, and continue. In the case of UTF-8
-      matching, the advance must be one character, not one byte. Fudge the
-      offset values to achieve this. We won't be at the end of the string -
-      that was checked before setting g_notempty. */
+      g_notempty after a null match, this is not necessarily the end. We want
+      to advance the start offset, and continue. We won't be at the end of the
+      string - that was checked before setting g_notempty.
+
+      Complication arises in the case when the newline option is "any".
+      If the previous match was at the end of a line terminated by CRLF, an
+      advance of one character just passes the \r, whereas we should prefer the
+      longer newline sequence, as does the code in pcre_exec(). Fudge the
+      offset value to achieve this.
+      
+      Otherwise, in the case of UTF-8 matching, the advance must be one
+      character, not one byte. */
 
       else
         {
@@ -2224,7 +2230,13 @@ while (!done)
           {
           int onechar = 1;
           use_offsets[0] = start_offset;
-          if (use_utf8)
+          if ((((real_pcre *)re)->options & PCRE_NEWLINE_BITS) == 
+                  PCRE_NEWLINE_ANY &&
+              start_offset < len - 1 &&
+              bptr[start_offset] == '\r' &&
+              bptr[start_offset+1] == '\n')
+            onechar++;   
+          else if (use_utf8)
             {
             while (start_offset + onechar < len)
               {
@@ -2256,39 +2268,26 @@ while (!done)
       what Perl's /g options does. This turns out to be rather cunning. First
       we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
       same point. If this fails (picked up above) we advance to the next
-      character.
-
-      Yet more complication arises in the case when the newline option is
-      "any" and a pattern in multiline mode has to match at the start of a
-      line. If a previous match was at the end of a line, and advance of one
-      character just passes the \r, whereas we should prefer the longer newline
-      sequence, as does the code in pcre_exec(). So we fudge it. */
+      character. */
 
       g_notempty = 0;
-      gany_fudge = 0;
 
       if (use_offsets[0] == use_offsets[1])
         {
         if (use_offsets[0] == len) break;
         g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
-        if ((((real_pcre *)re)->options & PCRE_STARTLINE) != 0 &&
-            (((real_pcre *)re)->options & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY &&
-            use_offsets[0] < len - 1 &&
-            bptr[use_offsets[0]] == '\r' &&
-            bptr[use_offsets[0]+1] == '\n')
-          gany_fudge = 1;
         }
 
       /* For /g, update the start offset, leaving the rest alone */
 
-      if (do_g) start_offset = use_offsets[1] + gany_fudge;
+      if (do_g) start_offset = use_offsets[1];
 
       /* For /G, update the pointer and length */
 
       else
         {
-        bptr += use_offsets[1] + gany_fudge;
-        len -= use_offsets[1] + gany_fudge;
+        bptr += use_offsets[1];
+        len -= use_offsets[1];
         }
       }  /* End of loop for /g and /G */
 
diff --git a/testdata/testinput2 b/testdata/testinput2
index 1de01cd..dcfa77f 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -2139,4 +2139,10 @@ a random value. /Ix
 /(?m)^$/<any>g+
     abc\r\n\r\n
 
+/(?m)^$|^\r\n/<any>g+ 
+    abc\r\n\r\n
+    
+/(?m)$/<any>g+ 
+    abc\r\n\r\n
+
 / End of testinput2 /
diff --git a/testdata/testinput7 b/testdata/testinput7
index 4f1dbc5..5c2dd6f 100644
--- a/testdata/testinput7
+++ b/testdata/testinput7
@@ -4243,4 +4243,10 @@
 /(?m)^$/<any>g+
     abc\r\n\r\n
 
+/(?m)^$|^\r\n/<any>g+ 
+    abc\r\n\r\n
+    
+/(?m)$/<any>g+ 
+    abc\r\n\r\n
+
 / End of testinput7 /
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 56806b6..222b8ef 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -8141,4 +8141,20 @@ No match
  0: 
  0+ \x0d\x0a
 
+/(?m)^$|^\r\n/<any>g+ 
+    abc\r\n\r\n
+ 0: 
+ 0+ \x0d\x0a
+ 0: \x0d\x0a
+ 0+ 
+    
+/(?m)$/<any>g+ 
+    abc\r\n\r\n
+ 0: 
+ 0+ \x0d\x0a\x0d\x0a
+ 0: 
+ 0+ \x0d\x0a
+ 0: 
+ 0+ 
+
 / End of testinput2 /
diff --git a/testdata/testoutput7 b/testdata/testoutput7
index f4abe04..6860b66 100644
--- a/testdata/testoutput7
+++ b/testdata/testoutput7
@@ -6975,4 +6975,19 @@ No match
  0: 
  0+ \x0d\x0a
 
+/(?m)^$|^\r\n/<any>g+ 
+    abc\r\n\r\n
+ 0: \x0d\x0a
+ 0+ 
+ 1: 
+    
+/(?m)$/<any>g+ 
+    abc\r\n\r\n
+ 0: 
+ 0+ \x0d\x0a\x0d\x0a
+ 0: 
+ 0+ \x0d\x0a
+ 0: 
+ 0+ 
+
 / End of testinput7 /
author	ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2007-04-02 10:08:14 +0000
committer	ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2007-04-02 10:08:14 +0000
commit	f8735bf7b631d53067a040bd9f2c04b8f80c6dae (patch)
tree	aa7ebef3c4d31210c14705eaeed55a0716a37f5a
parent	d0fc62ee8e85255467ef8541458df6e7f4e01cef (diff)
download	pcre-f8735bf7b631d53067a040bd9f2c04b8f80c6dae.tar.gz