summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2009-10-17 19:55:02 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2009-10-17 19:55:02 +0000
commit1da028459167ef408e659b9fe91eb70f3b79e395 (patch)
tree2fbe158f25c2f1fc68cb1532b6dd7cee33a7ec94
parentf66c8de115b662c90e2a0af9a4357f69df2b3106 (diff)
downloadpcre-1da028459167ef408e659b9fe91eb70f3b79e395.tar.gz
Fix PCRE_PARTIAL_HARD for patterns that end optionally, e.g. abc*
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@462 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog3
-rw-r--r--configure.ac4
-rw-r--r--pcre_dfa_exec.c91
-rw-r--r--pcre_exec.c270
-rw-r--r--pcre_internal.h8
-rw-r--r--testdata/testinput222
-rw-r--r--testdata/testinput522
-rw-r--r--testdata/testinput718
-rw-r--r--testdata/testinput818
-rw-r--r--testdata/testoutput235
-rw-r--r--testdata/testoutput535
-rw-r--r--testdata/testoutput734
-rw-r--r--testdata/testoutput834
13 files changed, 533 insertions, 61 deletions
diff --git a/ChangeLog b/ChangeLog
index 2885ebb..9e5d0a8 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -58,8 +58,7 @@ Version 8.00 05-Oct-09
10. Partial matching has been split into two forms: PCRE_PARTIAL_SOFT, which is
synonymous with PCRE_PARTIAL, for backwards compatibility, and
PCRE_PARTIAL_HARD, which causes a partial match to supersede a full match,
- and may be more useful for multi-segment matching, especially with
- pcre_exec().
+ and may be more useful for multi-segment matching.
11. Partial matching with pcre_exec() is now more intuitive. A partial match
used to be given if ever the end of the subject was reached; now it is
diff --git a/configure.ac b/configure.ac
index aed0d4c..2254be6 100644
--- a/configure.ac
+++ b/configure.ac
@@ -8,8 +8,8 @@ dnl empty.
m4_define(pcre_major, [8])
m4_define(pcre_minor, [00])
-m4_define(pcre_prerelease, [-RC1])
-m4_define(pcre_date, [2009-10-05])
+m4_define(pcre_prerelease, [-RC2])
+m4_define(pcre_date, [2009-10-17])
# Libtool shared library interface versions (current:revision:age)
m4_define(libpcre_version, [0:1:0])
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c
index 93a885e..458bb4c 100644
--- a/pcre_dfa_exec.c
+++ b/pcre_dfa_exec.c
@@ -109,8 +109,9 @@ never stored, so we push them well clear of the normal opcodes. */
character that is to be tested in some way. This makes is possible to
centralize the loading of these characters. In the case of Type * etc, the
"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
-small value. ***NOTE*** If the start of this table is modified, the two tables
-that follow must also be modified. */
+small value. Non-zero values in the table are the offsets from the opcode where
+the character is to be found. ***NOTE*** If the start of this table is
+modified, the three tables that follow must also be modified. */
static const uschar coptable[] = {
0, /* End */
@@ -160,7 +161,64 @@ static const uschar coptable[] = {
0, /* DEF */
0, 0, /* BRAZERO, BRAMINZERO */
0, 0, 0, 0, /* PRUNE, SKIP, THEN, COMMIT */
- 0, 0, 0 /* FAIL, ACCEPT, SKIPZERO */
+ 0, 0, 0, 0 /* FAIL, ACCEPT, CLOSE, SKIPZERO */
+};
+
+/* This table identifies those opcodes that inspect a character. It is used to
+remember the fact that a character could have been inspected when the end of
+the subject is reached, in order to support PCRE_PARTIAL_HARD behaviour.
+***NOTE*** If the start of this table is modified, the two tables that follow
+must also be modified. */
+
+static const uschar poptable[] = {
+ 0, /* End */
+ 0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */
+ 1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */
+ 1, 1, 1, /* Any, AllAny, Anybyte */
+ 1, 1, 1, /* NOTPROP, PROP, EXTUNI */
+ 1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */
+ 0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */
+ 1, /* Char */
+ 1, /* Charnc */
+ 1, /* not */
+ /* Positive single-char repeats */
+ 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
+ 1, 1, 1, /* upto, minupto, exact */
+ 1, 1, 1, 1, /* *+, ++, ?+, upto+ */
+ /* Negative single-char repeats - only for chars < 256 */
+ 1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */
+ 1, 1, 1, /* NOT upto, minupto, exact */
+ 1, 1, 1, 1, /* NOT *+, ++, ?+, upto+ */
+ /* Positive type repeats */
+ 1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */
+ 1, 1, 1, /* Type upto, minupto, exact */
+ 1, 1, 1, 1, /* Type *+, ++, ?+, upto+ */
+ /* Character class & ref repeats */
+ 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
+ 1, 1, /* CRRANGE, CRMINRANGE */
+ 1, /* CLASS */
+ 1, /* NCLASS */
+ 1, /* XCLASS - variable length */
+ 0, /* REF */
+ 0, /* RECURSE */
+ 0, /* CALLOUT */
+ 0, /* Alt */
+ 0, /* Ket */
+ 0, /* KetRmax */
+ 0, /* KetRmin */
+ 0, /* Assert */
+ 0, /* Assert not */
+ 0, /* Assert behind */
+ 0, /* Assert behind not */
+ 0, /* Reverse */
+ 0, 0, 0, 0, /* ONCE, BRA, CBRA, COND */
+ 0, 0, 0, /* SBRA, SCBRA, SCOND */
+ 0, /* CREF */
+ 0, /* RREF */
+ 0, /* DEF */
+ 0, 0, /* BRAZERO, BRAMINZERO */
+ 0, 0, 0, 0, /* PRUNE, SKIP, THEN, COMMIT */
+ 0, 0, 0, 0 /* FAIL, ACCEPT, CLOSE, SKIPZERO */
};
/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
@@ -489,6 +547,7 @@ for (;;)
unsigned int c, d;
int forced_fail = 0;
int reached_end = 0;
+ BOOL could_continue = FALSE;
/* Make the new state list into the active state list and empty the
new state list. */
@@ -596,6 +655,12 @@ for (;;)
code = start_code + state_offset;
codevalue = *code;
+
+ /* If this opcode inspects a character, but we are at the end of the
+ subject, remember the fact so that we can support PCRE_PARTIAL_HARD. */
+
+ if (clen == 0 && poptable[codevalue] != 0)
+ could_continue = TRUE;
/* If this opcode is followed by an inline character, load it. It is
tempting to test for the presence of a subject character here, but that
@@ -2522,16 +2587,24 @@ for (;;)
/* We have finished the processing at the current subject character. If no
new states have been set for the next character, we have found all the
matches that we are going to find. If we are at the top level and partial
- matching has been requested, check for appropriate conditions. The "forced_
- fail" variable counts the number of (*F) encountered for the character. If it
- is equal to the original active_count (saved in workspace[1]) it means that
- (*F) was found on every active state. In this case we don't want to give a
- partial match. */
+ matching has been requested, check for appropriate conditions.
+
+ The "forced_ fail" variable counts the number of (*F) encountered for the
+ character. If it is equal to the original active_count (saved in
+ workspace[1]) it means that (*F) was found on every active state. In this
+ case we don't want to give a partial match.
+
+ The "reached_end" variable counts the number of threads that have reached the
+ end of the pattern. The "could_continue" variable is true if a thread could
+ have continued but for the fact that the end of the subject was reached. */
if (new_count <= 0)
{
if (rlevel == 1 && /* Top level, and */
- reached_end != workspace[1] && /* Not all reached end */
+ ( /* either... */
+ reached_end != workspace[1] || /* Not all reached end */
+ could_continue /* or some could go on */
+ ) && /* and... */
forced_fail != workspace[1] && /* Not all forced fail & */
( /* either... */
(md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */
diff --git a/pcre_exec.c b/pcre_exec.c
index db1e926..f930095 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -415,7 +415,7 @@ the subject. */
}
#define SCHECK_PARTIAL()\
- if (md->partial && eptr > mstart)\
+ if (md->partial != 0 && eptr > mstart)\
{\
md->hitend = TRUE;\
if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\
@@ -2146,7 +2146,11 @@ for (;;)
pp = eptr;
for (i = min; i < max; i++)
{
- if (!match_ref(offset, eptr, length, md, ims)) break;
+ if (!match_ref(offset, eptr, length, md, ims))
+ {
+ CHECK_PARTIAL();
+ break;
+ }
eptr += length;
}
while (eptr >= pp)
@@ -2315,7 +2319,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (c > 255)
{
@@ -2341,7 +2349,11 @@ for (;;)
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
c = *eptr;
if ((data[c/8] & (1 << (c&7))) == 0) break;
eptr++;
@@ -2446,7 +2458,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLENTEST(c, eptr, len);
if (!_pcre_xclass(c, data)) break;
eptr += len;
@@ -2685,7 +2701,11 @@ for (;;)
eptr <= md->end_subject - oclength &&
memcmp(eptr, occhars, oclength) == 0) eptr += oclength;
#endif /* SUPPORT_UCP */
- else break;
+ else
+ {
+ CHECK_PARTIAL();
+ break;
+ }
}
if (possessive) continue;
@@ -2763,7 +2783,12 @@ for (;;)
pp = eptr;
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ if (fc != md->lcc[*eptr]) break;
eptr++;
}
@@ -2817,7 +2842,12 @@ for (;;)
pp = eptr;
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || fc != *eptr) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ if (fc != *eptr) break;
eptr++;
}
if (possessive) continue;
@@ -3029,7 +3059,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(d, eptr, len);
if (d < 256) d = md->lcc[d];
if (fc == d) break;
@@ -3050,7 +3084,12 @@ for (;;)
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ if (fc == md->lcc[*eptr]) break;
eptr++;
}
if (possessive) continue;
@@ -3159,7 +3198,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(d, eptr, len);
if (fc == d) break;
eptr += len;
@@ -3179,7 +3222,12 @@ for (;;)
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || fc == *eptr) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ if (fc == *eptr) break;
eptr++;
}
if (possessive) continue;
@@ -4335,7 +4383,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (prop_fail_result) break;
eptr+= len;
@@ -4346,7 +4398,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == ucp_Lu ||
@@ -4361,7 +4417,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
prop_category = UCD_CATEGORY(c);
if ((prop_category == prop_value) == prop_fail_result)
@@ -4374,7 +4434,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
prop_chartype = UCD_CHARTYPE(c);
if ((prop_chartype == prop_value) == prop_fail_result)
@@ -4387,7 +4451,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
prop_script = UCD_SCRIPT(c);
if ((prop_script == prop_value) == prop_fail_result)
@@ -4416,7 +4484,11 @@ for (;;)
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARINCTEST(c, eptr);
prop_category = UCD_CATEGORY(c);
if (prop_category == ucp_M) break;
@@ -4436,6 +4508,7 @@ for (;;)
/* eptr is now past the end of the maximum run */
if (possessive) continue;
+
for(;;)
{
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45);
@@ -4471,7 +4544,12 @@ for (;;)
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ if (IS_NEWLINE(eptr)) break;
eptr++;
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
}
@@ -4483,7 +4561,12 @@ for (;;)
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ if (IS_NEWLINE(eptr)) break;
eptr++;
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
}
@@ -4495,7 +4578,11 @@ for (;;)
{
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
eptr++;
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++;
}
@@ -4508,15 +4595,22 @@ for (;;)
case OP_ANYBYTE:
c = max - min;
if (c > (unsigned int)(md->end_subject - eptr))
- c = md->end_subject - eptr;
- eptr += c;
+ {
+ eptr = md->end_subject;
+ SCHECK_PARTIAL();
+ }
+ else eptr += c;
break;
case OP_ANYNL:
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (c == 0x000d)
{
@@ -4541,7 +4635,11 @@ for (;;)
{
BOOL gotspace;
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
switch(c)
{
@@ -4579,7 +4677,11 @@ for (;;)
{
BOOL gotspace;
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
switch(c)
{
@@ -4603,7 +4705,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break;
eptr+= len;
@@ -4614,7 +4720,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break;
eptr+= len;
@@ -4625,7 +4735,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break;
eptr+= len;
@@ -4636,7 +4750,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break;
eptr+= len;
@@ -4647,7 +4765,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break;
eptr+= len;
@@ -4658,7 +4780,11 @@ for (;;)
for (i = min; i < max; i++)
{
int len = 1;
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
GETCHARLEN(c, eptr, len);
if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break;
eptr+= len;
@@ -4690,7 +4816,12 @@ for (;;)
case OP_ANY:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
+ if (IS_NEWLINE(eptr)) break;
eptr++;
}
break;
@@ -4699,14 +4830,21 @@ for (;;)
case OP_ANYBYTE:
c = max - min;
if (c > (unsigned int)(md->end_subject - eptr))
- c = md->end_subject - eptr;
- eptr += c;
+ {
+ eptr = md->end_subject;
+ SCHECK_PARTIAL();
+ }
+ else eptr += c;
break;
case OP_ANYNL:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
c = *eptr;
if (c == 0x000d)
{
@@ -4727,7 +4865,11 @@ for (;;)
case OP_NOT_HSPACE:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
c = *eptr;
if (c == 0x09 || c == 0x20 || c == 0xa0) break;
eptr++;
@@ -4737,7 +4879,11 @@ for (;;)
case OP_HSPACE:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
c = *eptr;
if (c != 0x09 && c != 0x20 && c != 0xa0) break;
eptr++;
@@ -4747,7 +4893,11 @@ for (;;)
case OP_NOT_VSPACE:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
c = *eptr;
if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85)
break;
@@ -4758,7 +4908,11 @@ for (;;)
case OP_VSPACE:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject) break;
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
+ break;
+ }
c = *eptr;
if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85)
break;
@@ -4769,8 +4923,12 @@ for (;;)
case OP_NOT_DIGIT:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
break;
+ }
+ if ((md->ctypes[*eptr] & ctype_digit) != 0) break;
eptr++;
}
break;
@@ -4778,8 +4936,12 @@ for (;;)
case OP_DIGIT:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
break;
+ }
+ if ((md->ctypes[*eptr] & ctype_digit) == 0) break;
eptr++;
}
break;
@@ -4787,8 +4949,12 @@ for (;;)
case OP_NOT_WHITESPACE:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
break;
+ }
+ if ((md->ctypes[*eptr] & ctype_space) != 0) break;
eptr++;
}
break;
@@ -4796,8 +4962,12 @@ for (;;)
case OP_WHITESPACE:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
break;
+ }
+ if ((md->ctypes[*eptr] & ctype_space) == 0) break;
eptr++;
}
break;
@@ -4805,8 +4975,12 @@ for (;;)
case OP_NOT_WORDCHAR:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
break;
+ }
+ if ((md->ctypes[*eptr] & ctype_word) != 0) break;
eptr++;
}
break;
@@ -4814,8 +4988,12 @@ for (;;)
case OP_WORDCHAR:
for (i = min; i < max; i++)
{
- if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
+ if (eptr >= md->end_subject)
+ {
+ SCHECK_PARTIAL();
break;
+ }
+ if ((md->ctypes[*eptr] & ctype_word) == 0) break;
eptr++;
}
break;
diff --git a/pcre_internal.h b/pcre_internal.h
index a892af9..de09614 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -1210,8 +1210,8 @@ enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
OP_EOD must correspond in order to the list of escapes immediately above.
*** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions
-that follow must also be updated to match. There is also a table called
-"coptable" in pcre_dfa_exec.c that must be updated. */
+that follow must also be updated to match. There are also tables called
+"coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */
enum {
OP_END, /* 0 End of pattern */
@@ -1376,6 +1376,10 @@ enum {
OP_SKIPZERO /* 114 */
};
+/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro
+definitions that follow must also be updated to match. There are also tables
+called "coptable" cna "poptable" in pcre_dfa_exec.c that must be updated. */
+
/* This macro defines textual names for all the opcodes. These are used only
for debugging. The macro is referenced only in pcre_printint.c. */
diff --git a/testdata/testinput2 b/testdata/testinput2
index 7f887c8..850242e 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -3125,4 +3125,26 @@ a random value. /Ix
** Failers
abcdde
+/abcd*/
+ xxxxabcd\P
+ xxxxabcd\P\P
+
+/abcd*/i
+ xxxxabcd\P
+ xxxxabcd\P\P
+ XXXXABCD\P
+ XXXXABCD\P\P
+
+/abc\d*/
+ xxxxabc1\P
+ xxxxabc1\P\P
+
+/(a)bc\1*/
+ xxxxabca\P
+ xxxxabca\P\P
+
+/abc[de]*/
+ xxxxabcde\P
+ xxxxabcde\P\P
+
/-- End of testinput2 --/
diff --git a/testdata/testinput5 b/testdata/testinput5
index c1a21ba..82818d7 100644
--- a/testdata/testinput5
+++ b/testdata/testinput5
@@ -720,4 +720,26 @@ can't tell the difference.) --/
the cat\P
the cat\P\P
+/abcd*/8
+ xxxxabcd\P
+ xxxxabcd\P\P
+
+/abcd*/i8
+ xxxxabcd\P
+ xxxxabcd\P\P
+ XXXXABCD\P
+ XXXXABCD\P\P
+
+/abc\d*/8
+ xxxxabc1\P
+ xxxxabc1\P\P
+
+/(a)bc\1*/8
+ xxxxabca\P
+ xxxxabca\P\P
+
+/abc[de]*/8
+ xxxxabcde\P
+ xxxxabcde\P\P
+
/-- End of testinput5 --/
diff --git a/testdata/testinput7 b/testdata/testinput7
index f921835..dbc66bb 100644
--- a/testdata/testinput7
+++ b/testdata/testinput7
@@ -4507,4 +4507,22 @@
thejk;adlfj aenjl;fda asdfasd ehj;kjxyasiupd
\Ythejk;adlfj aenjl;fda asdfasd ehj;kjxyasiupd
+/abcd*/
+ xxxxabcd\P
+ xxxxabcd\P\P
+
+/abcd*/i
+ xxxxabcd\P
+ xxxxabcd\P\P
+ XXXXABCD\P
+ XXXXABCD\P\P
+
+/abc\d*/
+ xxxxabc1\P
+ xxxxabc1\P\P
+
+/abc[de]*/
+ xxxxabcde\P
+ xxxxabcde\P\P
+
/-- End of testinput7 --/
diff --git a/testdata/testinput8 b/testdata/testinput8
index e879e81..1c6f684 100644
--- a/testdata/testinput8
+++ b/testdata/testinput8
@@ -667,4 +667,22 @@
/X/8f<any>
A\x{1ec5}ABCXYZ
+/abcd*/8
+ xxxxabcd\P
+ xxxxabcd\P\P
+
+/abcd*/i8
+ xxxxabcd\P
+ xxxxabcd\P\P
+ XXXXABCD\P
+ XXXXABCD\P\P
+
+/abc\d*/8
+ xxxxabc1\P
+ xxxxabc1\P\P
+
+/abc[de]*/8
+ xxxxabcde\P
+ xxxxabcde\P\P
+
/-- End of testinput8 --/
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 0d5b61b..646478e 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -10372,4 +10372,39 @@ No match
abcdde
No match
+/abcd*/
+ xxxxabcd\P
+ 0: abcd
+ xxxxabcd\P\P
+Partial match: abcd
+
+/abcd*/i
+ xxxxabcd\P
+ 0: abcd
+ xxxxabcd\P\P
+Partial match: abcd
+ XXXXABCD\P
+ 0: ABCD
+ XXXXABCD\P\P
+Partial match: ABCD
+
+/abc\d*/
+ xxxxabc1\P
+ 0: abc1
+ xxxxabc1\P\P
+Partial match: abc1
+
+/(a)bc\1*/
+ xxxxabca\P
+ 0: abca
+ 1: a
+ xxxxabca\P\P
+Partial match: abca
+
+/abc[de]*/
+ xxxxabcde\P
+ 0: abcde
+ xxxxabcde\P\P
+Partial match: abcde
+
/-- End of testinput2 --/
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index 1aaa5be..f5de747 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@@ -2037,4 +2037,39 @@ Partial match: \x{123}X\x{123}\x{123}\x{123}\x{123}
the cat\P\P
Partial match: the cat
+/abcd*/8
+ xxxxabcd\P
+ 0: abcd
+ xxxxabcd\P\P
+Partial match: abcd
+
+/abcd*/i8
+ xxxxabcd\P
+ 0: abcd
+ xxxxabcd\P\P
+Partial match: abcd
+ XXXXABCD\P
+ 0: ABCD
+ XXXXABCD\P\P
+Partial match: ABCD
+
+/abc\d*/8
+ xxxxabc1\P
+ 0: abc1
+ xxxxabc1\P\P
+Partial match: abc1
+
+/(a)bc\1*/8
+ xxxxabca\P
+ 0: abca
+ 1: a
+ xxxxabca\P\P
+Partial match: abca
+
+/abc[de]*/8
+ xxxxabcde\P
+ 0: abcde
+ xxxxabcde\P\P
+Partial match: abcde
+
/-- End of testinput5 --/
diff --git a/testdata/testoutput7 b/testdata/testoutput7
index 8cac766..524450e 100644
--- a/testdata/testoutput7
+++ b/testdata/testoutput7
@@ -7514,4 +7514,38 @@ No match
\Ythejk;adlfj aenjl;fda asdfasd ehj;kjxyasiupd
No match
+/abcd*/
+ xxxxabcd\P
+ 0: abcd
+ 1: abc
+ xxxxabcd\P\P
+Partial match: abcd
+
+/abcd*/i
+ xxxxabcd\P
+ 0: abcd
+ 1: abc
+ xxxxabcd\P\P
+Partial match: abcd
+ XXXXABCD\P
+ 0: ABCD
+ 1: ABC
+ XXXXABCD\P\P
+Partial match: ABCD
+
+/abc\d*/
+ xxxxabc1\P
+ 0: abc1
+ 1: abc
+ xxxxabc1\P\P
+Partial match: abc1
+
+/abc[de]*/
+ xxxxabcde\P
+ 0: abcde
+ 1: abcd
+ 2: abc
+ xxxxabcde\P\P
+Partial match: abcde
+
/-- End of testinput7 --/
diff --git a/testdata/testoutput8 b/testdata/testoutput8
index d991095..0cc87d7 100644
--- a/testdata/testoutput8
+++ b/testdata/testoutput8
@@ -1286,4 +1286,38 @@ No match
A\x{1ec5}ABCXYZ
0: X
+/abcd*/8
+ xxxxabcd\P
+ 0: abcd
+ 1: abc
+ xxxxabcd\P\P
+Partial match: abcd
+
+/abcd*/i8
+ xxxxabcd\P
+ 0: abcd
+ 1: abc
+ xxxxabcd\P\P
+Partial match: abcd
+ XXXXABCD\P
+ 0: ABCD
+ 1: ABC
+ XXXXABCD\P\P
+Partial match: ABCD
+
+/abc\d*/8
+ xxxxabc1\P
+ 0: abc1
+ 1: abc
+ xxxxabc1\P\P
+Partial match: abc1
+
+/abc[de]*/8
+ xxxxabcde\P
+ 0: abcde
+ 1: abcd
+ 2: abc
+ xxxxabcde\P\P
+Partial match: abcde
+
/-- End of testinput8 --/