Fix PCRE_PARTIAL_HARD for patterns that end optionally, e.g. abc*

git-svn-id: svn://vcs.exim.org/pcre/code/trunk@462 2f5784b3-3f2a-0410-8824-cb99058d5e15
author: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2009-10-17 19:55:02 +0000
committer: ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> 2009-10-17 19:55:02 +0000
commit: 1da028459167ef408e659b9fe91eb70f3b79e395 (patch)
tree: 2fbe158f25c2f1fc68cb1532b6dd7cee33a7ec94
parent: f66c8de115b662c90e2a0af9a4357f69df2b3106 (diff)
download: pcre-1da028459167ef408e659b9fe91eb70f3b79e395.tar.gz
13 files changed, 533 insertions, 61 deletions
diff --git a/ChangeLog b/ChangeLog
index 2885ebb..9e5d0a8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -58,8 +58,7 @@ Version 8.00 05-Oct-09
 10. Partial matching has been split into two forms: PCRE_PARTIAL_SOFT, which is
     synonymous with PCRE_PARTIAL, for backwards compatibility, and
     PCRE_PARTIAL_HARD, which causes a partial match to supersede a full match,
-    and may be more useful for multi-segment matching, especially with
-    pcre_exec().
+    and may be more useful for multi-segment matching.
 
 11. Partial matching with pcre_exec() is now more intuitive. A partial match
     used to be given if ever the end of the subject was reached; now it is
diff --git a/configure.ac b/configure.ac
index aed0d4c..2254be6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -8,8 +8,8 @@ dnl empty.
 
 m4_define(pcre_major, [8])
 m4_define(pcre_minor, [00])
-m4_define(pcre_prerelease, [-RC1])
-m4_define(pcre_date, [2009-10-05])
+m4_define(pcre_prerelease, [-RC2])
+m4_define(pcre_date, [2009-10-17])
 
 # Libtool shared library interface versions (current:revision:age)
 m4_define(libpcre_version, [0:1:0])
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c
index 93a885e..458bb4c 100644
--- a/pcre_dfa_exec.c
+++ b/pcre_dfa_exec.c
@@ -109,8 +109,9 @@ never stored, so we push them well clear of the normal opcodes. */
 character that is to be tested in some way. This makes is possible to
 centralize the loading of these characters. In the case of Type * etc, the
 "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
-small value. ***NOTE*** If the start of this table is modified, the two tables
-that follow must also be modified. */
+small value. Non-zero values in the table are the offsets from the opcode where 
+the character is to be found. ***NOTE*** If the start of this table is
+modified, the three tables that follow must also be modified. */
 
 static const uschar coptable[] = {
   0,                             /* End                                    */
@@ -160,7 +161,64 @@ static const uschar coptable[] = {
   0,                             /* DEF                                    */
   0, 0,                          /* BRAZERO, BRAMINZERO                    */
   0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
-  0, 0, 0                        /* FAIL, ACCEPT, SKIPZERO                 */
+  0, 0, 0, 0                     /* FAIL, ACCEPT, CLOSE, SKIPZERO          */
+};
+
+/* This table identifies those opcodes that inspect a character. It is used to 
+remember the fact that a character could have been inspected when the end of
+the subject is reached, in order to support PCRE_PARTIAL_HARD behaviour.
+***NOTE*** If the start of this table is modified, the two tables that follow
+must also be modified. */
+
+static const uschar poptable[] = {
+  0,                             /* End                                    */
+  0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
+  1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */
+  1, 1, 1,                       /* Any, AllAny, Anybyte                   */
+  1, 1, 1,                       /* NOTPROP, PROP, EXTUNI                  */
+  1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */
+  0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
+  1,                             /* Char                                   */
+  1,                             /* Charnc                                 */
+  1,                             /* not                                    */
+  /* Positive single-char repeats                                          */
+  1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
+  1, 1, 1,                       /* upto, minupto, exact                   */
+  1, 1, 1, 1,                    /* *+, ++, ?+, upto+                      */
+  /* Negative single-char repeats - only for chars < 256                   */
+  1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
+  1, 1, 1,                       /* NOT upto, minupto, exact               */
+  1, 1, 1, 1,                    /* NOT *+, ++, ?+, upto+                  */
+  /* Positive type repeats                                                 */
+  1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
+  1, 1, 1,                       /* Type upto, minupto, exact              */
+  1, 1, 1, 1,                    /* Type *+, ++, ?+, upto+                 */
+  /* Character class & ref repeats                                         */
+  1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
+  1, 1,                          /* CRRANGE, CRMINRANGE                    */
+  1,                             /* CLASS                                  */
+  1,                             /* NCLASS                                 */
+  1,                             /* XCLASS - variable length               */
+  0,                             /* REF                                    */
+  0,                             /* RECURSE                                */
+  0,                             /* CALLOUT                                */
+  0,                             /* Alt                                    */
+  0,                             /* Ket                                    */
+  0,                             /* KetRmax                                */
+  0,                             /* KetRmin                                */
+  0,                             /* Assert                                 */
+  0,                             /* Assert not                             */
+  0,                             /* Assert behind                          */
+  0,                             /* Assert behind not                      */
+  0,                             /* Reverse                                */
+  0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */
+  0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */
+  0,                             /* CREF                                   */
+  0,                             /* RREF                                   */
+  0,                             /* DEF                                    */
+  0, 0,                          /* BRAZERO, BRAMINZERO                    */
+  0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
+  0, 0, 0, 0                     /* FAIL, ACCEPT, CLOSE, SKIPZERO          */
 };
 
 /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
@@ -489,6 +547,7 @@ for (;;)
   unsigned int c, d;
   int forced_fail = 0;
   int reached_end = 0;
+  BOOL could_continue = FALSE;
 
   /* Make the new state list into the active state list and empty the
   new state list. */
@@ -596,6 +655,12 @@ for (;;)
 
     code = start_code + state_offset;
     codevalue = *code;
+    
+    /* If this opcode inspects a character, but we are at the end of the 
+    subject, remember the fact so that we can support PCRE_PARTIAL_HARD. */
+
+    if (clen == 0 && poptable[codevalue] != 0)
+      could_continue = TRUE; 
 
     /* If this opcode is followed by an inline character, load it. It is
     tempting to test for the presence of a subject character here, but that
@@ -2522,16 +2587,24 @@ for (;;)
   /* We have finished the processing at the current subject character. If no
   new states have been set for the next character, we have found all the
   matches that we are going to find. If we are at the top level and partial
-  matching has been requested, check for appropriate conditions. The "forced_
-  fail" variable counts the number of (*F) encountered for the character. If it
-  is equal to the original active_count (saved in workspace[1]) it means that
-  (*F) was found on every active state. In this case we don't want to give a
-  partial match. */
+  matching has been requested, check for appropriate conditions. 
+  
+  The "forced_ fail" variable counts the number of (*F) encountered for the
+  character. If it is equal to the original active_count (saved in
+  workspace[1]) it means that (*F) was found on every active state. In this
+  case we don't want to give a partial match. 
+  
+  The "reached_end" variable counts the number of threads that have reached the 
+  end of the pattern. The "could_continue" variable is true if a thread could 
+  have continued but for the fact that the end of the subject was reached. */
 
   if (new_count <= 0)
     {
     if (rlevel == 1 &&                               /* Top level, and */
-        reached_end != workspace[1] &&               /* Not all reached end */
+        (                                            /* either... */
+        reached_end != workspace[1] ||               /* Not all reached end */
+          could_continue                             /* or some could go on */
+        ) &&                                         /* and... */
         forced_fail != workspace[1] &&               /* Not all forced fail & */
         (                                            /* either... */
         (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
diff --git a/pcre_exec.c b/pcre_exec.c
index db1e926..f930095 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -415,7 +415,7 @@ the subject. */
     }
 
 #define SCHECK_PARTIAL()\
-  if (md->partial && eptr > mstart)\
+  if (md->partial != 0 && eptr > mstart)\
     {\
     md->hitend = TRUE;\
     if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
@@ -2146,7 +2146,11 @@ for (;;)
         pp = eptr;
         for (i = min; i < max; i++)
           {
-          if (!match_ref(offset, eptr, length, md, ims)) break;
+          if (!match_ref(offset, eptr, length, md, ims)) 
+            {
+            CHECK_PARTIAL(); 
+            break;
+            } 
           eptr += length;
           }
         while (eptr >= pp)
@@ -2315,7 +2319,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             if (c > 255)
               {
@@ -2341,7 +2349,11 @@ for (;;)
           {
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             c = *eptr;
             if ((data[c/8] & (1 << (c&7))) == 0) break;
             eptr++;
@@ -2446,7 +2458,11 @@ for (;;)
         for (i = min; i < max; i++)
           {
           int len = 1;
-          if (eptr >= md->end_subject) break;
+          if (eptr >= md->end_subject) 
+            {
+            SCHECK_PARTIAL(); 
+            break;
+            } 
           GETCHARLENTEST(c, eptr, len);
           if (!_pcre_xclass(c, data)) break;
           eptr += len;
@@ -2685,7 +2701,11 @@ for (;;)
                      eptr <= md->end_subject - oclength &&
                      memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
 #endif  /* SUPPORT_UCP */
-            else break;
+            else 
+              {
+              CHECK_PARTIAL(); 
+              break;
+              } 
             }
 
           if (possessive) continue;
@@ -2763,7 +2783,12 @@ for (;;)
         pp = eptr;
         for (i = min; i < max; i++)
           {
-          if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
+          if (eptr >= md->end_subject) 
+            {
+            SCHECK_PARTIAL();
+            break;
+            } 
+          if (fc != md->lcc[*eptr]) break;
           eptr++;
           }
 
@@ -2817,7 +2842,12 @@ for (;;)
         pp = eptr;
         for (i = min; i < max; i++)
           {
-          if (eptr >= md->end_subject || fc != *eptr) break;
+          if (eptr >= md->end_subject) 
+            {
+            SCHECK_PARTIAL(); 
+            break;
+            } 
+          if (fc != *eptr) break;
           eptr++;
           }
         if (possessive) continue;
@@ -3029,7 +3059,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(d, eptr, len);
             if (d < 256) d = md->lcc[d];
             if (fc == d) break;
@@ -3050,7 +3084,12 @@ for (;;)
           {
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL();
+              break;
+              } 
+            if (fc == md->lcc[*eptr]) break;
             eptr++;
             }
           if (possessive) continue;
@@ -3159,7 +3198,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(d, eptr, len);
             if (fc == d) break;
             eptr += len;
@@ -3179,7 +3222,12 @@ for (;;)
           {
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || fc == *eptr) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
+            if (fc == *eptr) break;
             eptr++;
             }
           if (possessive) continue;
@@ -4335,7 +4383,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             if (prop_fail_result) break;
             eptr+= len;
@@ -4346,7 +4398,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             prop_chartype = UCD_CHARTYPE(c);
             if ((prop_chartype == ucp_Lu ||
@@ -4361,7 +4417,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             prop_category = UCD_CATEGORY(c);
             if ((prop_category == prop_value) == prop_fail_result)
@@ -4374,7 +4434,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             prop_chartype = UCD_CHARTYPE(c);
             if ((prop_chartype == prop_value) == prop_fail_result)
@@ -4387,7 +4451,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             prop_script = UCD_SCRIPT(c);
             if ((prop_script == prop_value) == prop_fail_result)
@@ -4416,7 +4484,11 @@ for (;;)
         {
         for (i = min; i < max; i++)
           {
-          if (eptr >= md->end_subject) break;
+          if (eptr >= md->end_subject) 
+            {
+            SCHECK_PARTIAL(); 
+            break;
+            } 
           GETCHARINCTEST(c, eptr);
           prop_category = UCD_CATEGORY(c);
           if (prop_category == ucp_M) break;
@@ -4436,6 +4508,7 @@ for (;;)
         /* eptr is now past the end of the maximum run */
 
         if (possessive) continue;
+
         for(;;)
           {
           RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
@@ -4471,7 +4544,12 @@ for (;;)
             {
             for (i = min; i < max; i++)
               {
-              if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
+              if (eptr >= md->end_subject) 
+                {
+                SCHECK_PARTIAL(); 
+                break;
+                } 
+              if (IS_NEWLINE(eptr)) break;
               eptr++;
               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
               }
@@ -4483,7 +4561,12 @@ for (;;)
             {
             for (i = min; i < max; i++)
               {
-              if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
+              if (eptr >= md->end_subject) 
+                {
+                SCHECK_PARTIAL(); 
+                break;
+                } 
+              if (IS_NEWLINE(eptr)) break;
               eptr++;
               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
               }
@@ -4495,7 +4578,11 @@ for (;;)
             {
             for (i = min; i < max; i++)
               {
-              if (eptr >= md->end_subject) break;
+              if (eptr >= md->end_subject) 
+                {
+                SCHECK_PARTIAL(); 
+                break;
+                } 
               eptr++;
               while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
               }
@@ -4508,15 +4595,22 @@ for (;;)
           case OP_ANYBYTE:
           c = max - min;
           if (c > (unsigned int)(md->end_subject - eptr))
-            c = md->end_subject - eptr;
-          eptr += c;
+            {
+            eptr = md->end_subject;
+            SCHECK_PARTIAL();
+            }  
+          else eptr += c;
           break;
 
           case OP_ANYNL:
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             if (c == 0x000d)
               {
@@ -4541,7 +4635,11 @@ for (;;)
             {
             BOOL gotspace;
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             switch(c)
               {
@@ -4579,7 +4677,11 @@ for (;;)
             {
             BOOL gotspace;
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             switch(c)
               {
@@ -4603,7 +4705,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
             eptr+= len;
@@ -4614,7 +4720,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
             eptr+= len;
@@ -4625,7 +4735,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
             eptr+= len;
@@ -4636,7 +4750,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
             eptr+= len;
@@ -4647,7 +4765,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
             eptr+= len;
@@ -4658,7 +4780,11 @@ for (;;)
           for (i = min; i < max; i++)
             {
             int len = 1;
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             GETCHARLEN(c, eptr, len);
             if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
             eptr+= len;
@@ -4690,7 +4816,12 @@ for (;;)
           case OP_ANY:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL();
+              break;
+              } 
+            if (IS_NEWLINE(eptr)) break;
             eptr++;
             }
           break;
@@ -4699,14 +4830,21 @@ for (;;)
           case OP_ANYBYTE:
           c = max - min;
           if (c > (unsigned int)(md->end_subject - eptr))
-            c = md->end_subject - eptr;
-          eptr += c;
+            {
+            eptr = md->end_subject;
+            SCHECK_PARTIAL();
+            }    
+          else eptr += c;
           break;
 
           case OP_ANYNL:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             c = *eptr;
             if (c == 0x000d)
               {
@@ -4727,7 +4865,11 @@ for (;;)
           case OP_NOT_HSPACE:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             c = *eptr;
             if (c == 0x09 || c == 0x20 || c == 0xa0) break;
             eptr++;
@@ -4737,7 +4879,11 @@ for (;;)
           case OP_HSPACE:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             c = *eptr;
             if (c != 0x09 && c != 0x20 && c != 0xa0) break;
             eptr++;
@@ -4747,7 +4893,11 @@ for (;;)
           case OP_NOT_VSPACE:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             c = *eptr;
             if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
               break;
@@ -4758,7 +4908,11 @@ for (;;)
           case OP_VSPACE:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject) break;
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
+              break;
+              } 
             c = *eptr;
             if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
               break;
@@ -4769,8 +4923,12 @@ for (;;)
           case OP_NOT_DIGIT:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
               break;
+              } 
+            if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
             eptr++;
             }
           break;
@@ -4778,8 +4936,12 @@ for (;;)
           case OP_DIGIT:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
               break;
+              } 
+            if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
             eptr++;
             }
           break;
@@ -4787,8 +4949,12 @@ for (;;)
           case OP_NOT_WHITESPACE:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
               break;
+              } 
+            if ((md->ctypes[*eptr] & ctype_space) != 0) break;
             eptr++;
             }
           break;
@@ -4796,8 +4962,12 @@ for (;;)
           case OP_WHITESPACE:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
               break;
+              } 
+            if ((md->ctypes[*eptr] & ctype_space) == 0) break;
             eptr++;
             }
           break;
@@ -4805,8 +4975,12 @@ for (;;)
           case OP_NOT_WORDCHAR:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
               break;
+              } 
+            if ((md->ctypes[*eptr] & ctype_word) != 0) break;
             eptr++;
             }
           break;
@@ -4814,8 +4988,12 @@ for (;;)
           case OP_WORDCHAR:
           for (i = min; i < max; i++)
             {
-            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
+            if (eptr >= md->end_subject) 
+              {
+              SCHECK_PARTIAL(); 
               break;
+              } 
+            if ((md->ctypes[*eptr] & ctype_word) == 0) break;
             eptr++;
             }
           break;
diff --git a/pcre_internal.h b/pcre_internal.h
index a892af9..de09614 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -1210,8 +1210,8 @@ enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
 OP_EOD must correspond in order to the list of escapes immediately above.
 
 *** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions
-that follow must also be updated to match. There is also a table called
-"coptable" in pcre_dfa_exec.c that must be updated. */
+that follow must also be updated to match. There are also tables called
+"coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */
 
 enum {
   OP_END,            /* 0 End of pattern */
@@ -1376,6 +1376,10 @@ enum {
   OP_SKIPZERO        /* 114 */
 };
 
+/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
+definitions that follow must also be updated to match. There are also tables
+called "coptable" cna "poptable" in pcre_dfa_exec.c that must be updated. */
+
 
 /* This macro defines textual names for all the opcodes. These are used only
 for debugging. The macro is referenced only in pcre_printint.c. */
diff --git a/testdata/testinput2 b/testdata/testinput2
index 7f887c8..850242e 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -3125,4 +3125,26 @@ a random value. /Ix
     ** Failers
     abcdde  
 
+/abcd*/
+    xxxxabcd\P
+    xxxxabcd\P\P
+
+/abcd*/i
+    xxxxabcd\P
+    xxxxabcd\P\P
+    XXXXABCD\P
+    XXXXABCD\P\P
+
+/abc\d*/
+    xxxxabc1\P
+    xxxxabc1\P\P
+
+/(a)bc\1*/
+    xxxxabca\P
+    xxxxabca\P\P
+
+/abc[de]*/
+    xxxxabcde\P
+    xxxxabcde\P\P
+
 /-- End of testinput2 --/
diff --git a/testdata/testinput5 b/testdata/testinput5
index c1a21ba..82818d7 100644
--- a/testdata/testinput5
+++ b/testdata/testinput5
@@ -720,4 +720,26 @@ can't tell the difference.) --/
     the cat\P
     the cat\P\P
 
+/abcd*/8
+    xxxxabcd\P
+    xxxxabcd\P\P
+
+/abcd*/i8
+    xxxxabcd\P
+    xxxxabcd\P\P
+    XXXXABCD\P
+    XXXXABCD\P\P
+
+/abc\d*/8
+    xxxxabc1\P
+    xxxxabc1\P\P
+
+/(a)bc\1*/8
+    xxxxabca\P
+    xxxxabca\P\P
+
+/abc[de]*/8
+    xxxxabcde\P
+    xxxxabcde\P\P
+
 /-- End of testinput5 --/
diff --git a/testdata/testinput7 b/testdata/testinput7
index f921835..dbc66bb 100644
--- a/testdata/testinput7
+++ b/testdata/testinput7
@@ -4507,4 +4507,22 @@
     thejk;adlfj aenjl;fda asdfasd ehj;kjxyasiupd
     \Ythejk;adlfj aenjl;fda asdfasd ehj;kjxyasiupd
 
+/abcd*/
+    xxxxabcd\P
+    xxxxabcd\P\P
+
+/abcd*/i
+    xxxxabcd\P
+    xxxxabcd\P\P
+    XXXXABCD\P
+    XXXXABCD\P\P
+
+/abc\d*/
+    xxxxabc1\P
+    xxxxabc1\P\P
+
+/abc[de]*/
+    xxxxabcde\P
+    xxxxabcde\P\P
+
 /-- End of testinput7 --/
diff --git a/testdata/testinput8 b/testdata/testinput8
index e879e81..1c6f684 100644
--- a/testdata/testinput8
+++ b/testdata/testinput8
@@ -667,4 +667,22 @@
 /X/8f<any> 
     A\x{1ec5}ABCXYZ
 
+/abcd*/8
+    xxxxabcd\P
+    xxxxabcd\P\P
+
+/abcd*/i8
+    xxxxabcd\P
+    xxxxabcd\P\P
+    XXXXABCD\P
+    XXXXABCD\P\P
+
+/abc\d*/8
+    xxxxabc1\P
+    xxxxabc1\P\P
+
+/abc[de]*/8
+    xxxxabcde\P
+    xxxxabcde\P\P
+
 /-- End of testinput8 --/ 
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 0d5b61b..646478e 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -10372,4 +10372,39 @@ No match
     abcdde  
 No match
 
+/abcd*/
+    xxxxabcd\P
+ 0: abcd
+    xxxxabcd\P\P
+Partial match: abcd
+
+/abcd*/i
+    xxxxabcd\P
+ 0: abcd
+    xxxxabcd\P\P
+Partial match: abcd
+    XXXXABCD\P
+ 0: ABCD
+    XXXXABCD\P\P
+Partial match: ABCD
+
+/abc\d*/
+    xxxxabc1\P
+ 0: abc1
+    xxxxabc1\P\P
+Partial match: abc1
+
+/(a)bc\1*/
+    xxxxabca\P
+ 0: abca
+ 1: a
+    xxxxabca\P\P
+Partial match: abca
+
+/abc[de]*/
+    xxxxabcde\P
+ 0: abcde
+    xxxxabcde\P\P
+Partial match: abcde
+
 /-- End of testinput2 --/
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index 1aaa5be..f5de747 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@@ -2037,4 +2037,39 @@ Partial match: \x{123}X\x{123}\x{123}\x{123}\x{123}
     the cat\P\P
 Partial match: the cat
 
+/abcd*/8
+    xxxxabcd\P
+ 0: abcd
+    xxxxabcd\P\P
+Partial match: abcd
+
+/abcd*/i8
+    xxxxabcd\P
+ 0: abcd
+    xxxxabcd\P\P
+Partial match: abcd
+    XXXXABCD\P
+ 0: ABCD
+    XXXXABCD\P\P
+Partial match: ABCD
+
+/abc\d*/8
+    xxxxabc1\P
+ 0: abc1
+    xxxxabc1\P\P
+Partial match: abc1
+
+/(a)bc\1*/8
+    xxxxabca\P
+ 0: abca
+ 1: a
+    xxxxabca\P\P
+Partial match: abca
+
+/abc[de]*/8
+    xxxxabcde\P
+ 0: abcde
+    xxxxabcde\P\P
+Partial match: abcde
+
 /-- End of testinput5 --/
diff --git a/testdata/testoutput7 b/testdata/testoutput7
index 8cac766..524450e 100644
--- a/testdata/testoutput7
+++ b/testdata/testoutput7
@@ -7514,4 +7514,38 @@ No match
     \Ythejk;adlfj aenjl;fda asdfasd ehj;kjxyasiupd
 No match
 
+/abcd*/
+    xxxxabcd\P
+ 0: abcd
+ 1: abc
+    xxxxabcd\P\P
+Partial match: abcd
+
+/abcd*/i
+    xxxxabcd\P
+ 0: abcd
+ 1: abc
+    xxxxabcd\P\P
+Partial match: abcd
+    XXXXABCD\P
+ 0: ABCD
+ 1: ABC
+    XXXXABCD\P\P
+Partial match: ABCD
+
+/abc\d*/
+    xxxxabc1\P
+ 0: abc1
+ 1: abc
+    xxxxabc1\P\P
+Partial match: abc1
+
+/abc[de]*/
+    xxxxabcde\P
+ 0: abcde
+ 1: abcd
+ 2: abc
+    xxxxabcde\P\P
+Partial match: abcde
+
 /-- End of testinput7 --/
diff --git a/testdata/testoutput8 b/testdata/testoutput8
index d991095..0cc87d7 100644
--- a/testdata/testoutput8
+++ b/testdata/testoutput8
@@ -1286,4 +1286,38 @@ No match
     A\x{1ec5}ABCXYZ
  0: X
 
+/abcd*/8
+    xxxxabcd\P
+ 0: abcd
+ 1: abc
+    xxxxabcd\P\P
+Partial match: abcd
+
+/abcd*/i8
+    xxxxabcd\P
+ 0: abcd
+ 1: abc
+    xxxxabcd\P\P
+Partial match: abcd
+    XXXXABCD\P
+ 0: ABCD
+ 1: ABC
+    XXXXABCD\P\P
+Partial match: ABCD
+
+/abc\d*/8
+    xxxxabc1\P
+ 0: abc1
+ 1: abc
+    xxxxabc1\P\P
+Partial match: abc1
+
+/abc[de]*/8
+    xxxxabcde\P
+ 0: abcde
+ 1: abcd
+ 2: abc
+    xxxxabcde\P\P
+Partial match: abcde
+
 /-- End of testinput8 --/
author	ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2009-10-17 19:55:02 +0000
committer	ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>	2009-10-17 19:55:02 +0000
commit	1da028459167ef408e659b9fe91eb70f3b79e395 (patch)
tree	2fbe158f25c2f1fc68cb1532b6dd7cee33a7ec94
parent	f66c8de115b662c90e2a0af9a4357f69df2b3106 (diff)
download	pcre-1da028459167ef408e659b9fe91eb70f3b79e395.tar.gz