summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2013-10-14 13:54:07 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2013-10-14 13:54:07 +0000
commit019360748cb83ed81a5fb1a68466c9b23e70f867 (patch)
treea4b945f14e772cf539a4e6aad84822c30df8df49
parent5bedf037b4d42e927e89cd4e5e7c789217a4df0d (diff)
downloadpcre-019360748cb83ed81a5fb1a68466c9b23e70f867.tar.gz
More auto-possessification additions, using possessive class repeats. These are
not yet used for explicit possessification. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@1379 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--pcre_compile.c89
-rw-r--r--pcre_dfa_exec.c76
-rw-r--r--pcre_exec.c29
-rw-r--r--pcre_internal.h113
-rw-r--r--pcre_jit_compile.c103
-rw-r--r--pcre_printint.c5
-rw-r--r--pcre_study.c11
-rw-r--r--testdata/saved16BE-1bin410 -> 410 bytes
-rw-r--r--testdata/saved16BE-2bin344 -> 344 bytes
-rw-r--r--testdata/saved16LE-1bin410 -> 410 bytes
-rw-r--r--testdata/saved16LE-2bin344 -> 344 bytes
-rw-r--r--testdata/saved32BE-1bin552 -> 552 bytes
-rw-r--r--testdata/saved32BE-2bin456 -> 456 bytes
-rw-r--r--testdata/saved32LE-1bin552 -> 552 bytes
-rw-r--r--testdata/saved32LE-2bin456 -> 456 bytes
-rw-r--r--testdata/testinput230
-rw-r--r--testdata/testinput832
-rw-r--r--testdata/testoutput142
-rw-r--r--testdata/testoutput2116
-rw-r--r--testdata/testoutput8118
-rw-r--r--testdata/testoutput92
21 files changed, 524 insertions, 202 deletions
diff --git a/pcre_compile.c b/pcre_compile.c
index d56b7f8..44118ea 100644
--- a/pcre_compile.c
+++ b/pcre_compile.c
@@ -1817,16 +1817,20 @@ for (;;)
switch (*cc)
{
- case OP_CRPLUS:
- case OP_CRMINPLUS:
case OP_CRSTAR:
case OP_CRMINSTAR:
+ case OP_CRPLUS:
+ case OP_CRMINPLUS:
case OP_CRQUERY:
case OP_CRMINQUERY:
+ case OP_CRPOSSTAR:
+ case OP_CRPOSPLUS:
+ case OP_CRPOSQUERY:
return -1;
case OP_CRRANGE:
case OP_CRMINRANGE:
+ case OP_CRPOSRANGE:
if (GET2(cc,1) != GET2(cc,1+IMM2_SIZE)) return -1;
branchlength += (int)GET2(cc,1);
cc += 1 + 2 * IMM2_SIZE;
@@ -2419,15 +2423,19 @@ for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
case OP_CRMINSTAR:
case OP_CRQUERY:
case OP_CRMINQUERY:
+ case OP_CRPOSSTAR:
+ case OP_CRPOSQUERY:
break;
default: /* Non-repeat => class must match */
case OP_CRPLUS: /* These repeats aren't empty */
case OP_CRMINPLUS:
+ case OP_CRPOSPLUS:
return FALSE;
case OP_CRRANGE:
case OP_CRMINRANGE:
+ case OP_CRPOSRANGE:
if (GET2(ccode, 1) > 0) return FALSE; /* Minimum > 0 */
break;
}
@@ -2920,12 +2928,21 @@ switch(c)
case OP_CRMINSTAR:
case OP_CRQUERY:
case OP_CRMINQUERY:
+ case OP_CRPOSSTAR:
+ case OP_CRPOSQUERY:
list[1] = TRUE;
end++;
break;
+ case OP_CRPLUS:
+ case OP_CRMINPLUS:
+ case OP_CRPOSPLUS:
+ end++;
+ break;
+
case OP_CRRANGE:
case OP_CRMINRANGE:
+ case OP_CRPOSRANGE:
list[1] = (GET2(end, 1) == 0);
end += 1 + 2 * IMM2_SIZE;
break;
@@ -2956,7 +2973,7 @@ Returns: TRUE if the auto-possessification is possible
static BOOL
compare_opcodes(const pcre_uchar *code, BOOL utf, const compile_data *cd,
- const pcre_uint32* base_list)
+ const pcre_uint32* base_list, const pcre_uchar *base_end)
{
pcre_uchar c;
pcre_uint32 list[8];
@@ -2964,6 +2981,7 @@ const pcre_uint32* chr_ptr;
const pcre_uint32* ochr_ptr;
const pcre_uint32* list_ptr;
const pcre_uchar *next_code;
+const pcre_uint8 *class_bits;
pcre_uint32 chr;
/* Note: the base_list[1] contains whether the current opcode has greedy
@@ -3039,7 +3057,7 @@ for(;;)
while (*next_code == OP_ALT)
{
- if (!compare_opcodes(code, utf, cd, base_list)) return FALSE;
+ if (!compare_opcodes(code, utf, cd, base_list, base_end)) return FALSE;
code = next_code + 1 + LINK_SIZE;
next_code += GET(next_code, 1);
}
@@ -3061,7 +3079,7 @@ for(;;)
/* The bracket content will be checked by the
OP_BRA/OP_CBRA case above. */
next_code += 1 + LINK_SIZE;
- if (!compare_opcodes(next_code, utf, cd, base_list)) return FALSE;
+ if (!compare_opcodes(next_code, utf, cd, base_list, base_end)) return FALSE;
code += PRIV(OP_lengths)[c];
continue;
@@ -3318,21 +3336,14 @@ for(;;)
return FALSE;
break;
- /* The class comparisons work only when the class is the second item
- of the pair, because there are at present no possessive forms of the
- class opcodes. Note also that the "code" variable that is used below
- points after the second item, and that the pointer for the first item
- is not available, so even if there were possessive forms of the class
- opcodes, the correct comparison could not be done. */
-
case OP_NCLASS:
if (chr > 255) return FALSE;
/* Fall through */
case OP_CLASS:
- if (list_ptr != list) return FALSE; /* Class is first opcode */
if (chr > 255) break;
- if ((((pcre_uint8 *)(code - list_ptr[2]))[chr >> 3] & (1 << (chr & 7))) != 0)
+ class_bits = (pcre_uint8 *)((list_ptr == list ? code : base_end) - list_ptr[2]);
+ if ((class_bits[chr >> 3] & (1 << (chr & 7))) != 0)
return FALSE;
break;
@@ -3380,14 +3391,15 @@ Returns: nothing
static void
auto_possessify(pcre_uchar *code, BOOL utf, const compile_data *cd)
{
-register pcre_uchar c;
+register pcre_uchar c, d;
const pcre_uchar *end;
+pcre_uchar *repeat_code;
pcre_uint32 list[8];
for (;;)
{
c = *code;
-
+
if (c >= OP_STAR && c <= OP_TYPEPOSUPTO)
{
c -= get_repeat_base(c) - OP_STAR;
@@ -3395,7 +3407,7 @@ for (;;)
get_chr_property_list(code, utf, cd->fcc, list) : NULL;
list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
- if (end != NULL && compare_opcodes(end, utf, cd, list))
+ if (end != NULL && compare_opcodes(end, utf, cd, list, end))
{
switch(c)
{
@@ -3434,6 +3446,47 @@ for (;;)
}
c = *code;
}
+ else if (c == OP_CLASS || c == OP_NCLASS || c == OP_XCLASS)
+ {
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
+ if (c == OP_XCLASS)
+ repeat_code = code + 1 + GET(code, 1);
+ else
+#endif
+ repeat_code = code + 1 + (32 / sizeof(pcre_uchar));
+
+ d = *repeat_code;
+ if (d >= OP_CRSTAR && d <= OP_CRMINRANGE)
+ {
+ /* end must not be NULL. */
+ end = get_chr_property_list(code, utf, cd->fcc, list);
+
+ list[1] = d == OP_CRSTAR || d == OP_CRPLUS || d == OP_CRQUERY ||
+ d == OP_CRRANGE;
+
+ if (compare_opcodes(end, utf, cd, list, end))
+ {
+ switch (d)
+ {
+ case OP_CRSTAR:
+ *repeat_code = OP_CRPOSSTAR;
+ break;
+
+ case OP_CRPLUS:
+ *repeat_code = OP_CRPOSPLUS;
+ break;
+
+ case OP_CRQUERY:
+ *repeat_code = OP_CRPOSQUERY;
+ break;
+
+ case OP_CRRANGE:
+ *repeat_code = OP_CRPOSRANGE;
+ break;
+ }
+ }
+ }
+ }
switch(c)
{
@@ -3460,9 +3513,11 @@ for (;;)
code += 2;
break;
+#if defined SUPPORT_UTF || !defined COMPILE_PCRE8
case OP_XCLASS:
code += GET(code, 1);
break;
+#endif
case OP_MARK:
case OP_PRUNE_ARG:
diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c
index 19fba5b..216a515 100644
--- a/pcre_dfa_exec.c
+++ b/pcre_dfa_exec.c
@@ -151,6 +151,7 @@ static const pcre_uint8 coptable[] = {
/* Character class & ref repeats */
0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */
0, 0, /* CRRANGE, CRMINRANGE */
+ 0, 0, 0, 0, /* Possessive *+, ++, ?+, CRPOSRANGE */
0, /* CLASS */
0, /* NCLASS */
0, /* XCLASS - variable length */
@@ -222,6 +223,7 @@ static const pcre_uint8 poptable[] = {
/* Character class & ref repeats */
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */
1, 1, /* CRRANGE, CRMINRANGE */
+ 1, 1, 1, 1, /* Possessive *+, ++, ?+, CRPOSRANGE */
1, /* CLASS */
1, /* NCLASS */
1, /* XCLASS - variable length */
@@ -1101,7 +1103,7 @@ for (;;)
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
which means that Perl space and POSIX space are now identical. PCRE
was changed at release 8.34. */
-
+
case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */
switch(c)
@@ -1110,11 +1112,11 @@ for (;;)
VSPACE_CASES:
OK = TRUE;
break;
-
- default:
+
+ default:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
break;
- }
+ }
break;
case PT_WORD:
@@ -1359,7 +1361,7 @@ for (;;)
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
which means that Perl space and POSIX space are now identical. PCRE
was changed at release 8.34. */
-
+
case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */
switch(c)
@@ -1368,11 +1370,11 @@ for (;;)
VSPACE_CASES:
OK = TRUE;
break;
-
- default:
+
+ default:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
break;
- }
+ }
break;
case PT_WORD:
@@ -1611,7 +1613,7 @@ for (;;)
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
which means that Perl space and POSIX space are now identical. PCRE
was changed at release 8.34. */
-
+
case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */
switch(c)
@@ -1620,11 +1622,11 @@ for (;;)
VSPACE_CASES:
OK = TRUE;
break;
-
- default:
+
+ default:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
break;
- }
+ }
break;
case PT_WORD:
@@ -1888,7 +1890,7 @@ for (;;)
/* Perl space used to exclude VT, but from Perl 5.18 it is included,
which means that Perl space and POSIX space are now identical. PCRE
was changed at release 8.34. */
-
+
case PT_SPACE: /* Perl space */
case PT_PXSPACE: /* POSIX space */
switch(c)
@@ -1897,11 +1899,11 @@ for (;;)
VSPACE_CASES:
OK = TRUE;
break;
-
- default:
+
+ default:
OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
break;
- }
+ }
break;
case PT_WORD:
@@ -2569,31 +2571,65 @@ for (;;)
{
case OP_CRSTAR:
case OP_CRMINSTAR:
+ case OP_CRPOSSTAR:
ADD_ACTIVE(next_state_offset + 1, 0);
- if (isinclass) { ADD_NEW(state_offset, 0); }
+ if (isinclass)
+ {
+ if (*ecode == OP_CRPOSSTAR)
+ {
+ active_count--; /* Remove non-match possibility */
+ next_active_state--;
+ }
+ ADD_NEW(state_offset, 0);
+ }
break;
case OP_CRPLUS:
case OP_CRMINPLUS:
+ case OP_CRPOSPLUS:
count = current_state->count; /* Already matched */
if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); }
- if (isinclass) { count++; ADD_NEW(state_offset, count); }
+ if (isinclass)
+ {
+ if (count > 0 && *ecode == OP_CRPOSPLUS)
+ {
+ active_count--; /* Remove non-match possibility */
+ next_active_state--;
+ }
+ count++;
+ ADD_NEW(state_offset, count);
+ }
break;
case OP_CRQUERY:
case OP_CRMINQUERY:
+ case OP_CRPOSQUERY:
ADD_ACTIVE(next_state_offset + 1, 0);
- if (isinclass) { ADD_NEW(next_state_offset + 1, 0); }
+ if (isinclass)
+ {
+ if (*ecode == OP_CRPOSQUERY)
+ {
+ active_count--; /* Remove non-match possibility */
+ next_active_state--;
+ }
+ ADD_NEW(next_state_offset + 1, 0);
+ }
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
+ case OP_CRPOSRANGE:
count = current_state->count; /* Already matched */
if (count >= (int)GET2(ecode, 1))
{ ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
if (isinclass)
{
int max = (int)GET2(ecode, 1 + IMM2_SIZE);
+ if (*ecode == OP_CRPOSRANGE)
+ {
+ active_count--; /* Remove non-match possibility */
+ next_active_state--;
+ }
if (++count >= max && max != 0) /* Max 0 => no limit */
{ ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
else
@@ -2696,7 +2732,7 @@ for (;;)
/* Back reference conditions and duplicate named recursion conditions
are not supported */
- if (condcode == OP_CREF || condcode == OP_DNCREF ||
+ if (condcode == OP_CREF || condcode == OP_DNCREF ||
condcode == OP_DNRREF)
return PCRE_ERROR_DFA_UCOND;
diff --git a/pcre_exec.c b/pcre_exec.c
index 7311aac..2470ee9 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -107,8 +107,8 @@ because the offset vector is always a multiple of 3 long. */
/* Min and max values for the common repeats; for the maxima, 0 => infinity */
-static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
-static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
+static const char rep_min[] = { 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, };
+static const char rep_max[] = { 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, };
#ifdef PCRE_DEBUG
/*************************************************
@@ -2864,8 +2864,12 @@ for (;;)
case OP_CRMINPLUS:
case OP_CRQUERY:
case OP_CRMINQUERY:
+ case OP_CRPOSSTAR:
+ case OP_CRPOSPLUS:
+ case OP_CRPOSQUERY:
c = *ecode++ - OP_CRSTAR;
- minimize = (c & 1) != 0;
+ if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
+ else possessive = TRUE;
min = rep_min[c]; /* Pick up values from tables; */
max = rep_max[c]; /* zero for max => infinity */
if (max == 0) max = INT_MAX;
@@ -2873,7 +2877,9 @@ for (;;)
case OP_CRRANGE:
case OP_CRMINRANGE:
+ case OP_CRPOSRANGE:
minimize = (*ecode == OP_CRMINRANGE);
+ possessive = (*ecode == OP_CRPOSRANGE);
min = GET2(ecode, 1);
max = GET2(ecode, 1 + IMM2_SIZE);
if (max == 0) max = INT_MAX;
@@ -3015,6 +3021,9 @@ for (;;)
if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
eptr += len;
}
+
+ if (possessive) continue; /* No backtracking */
+
for (;;)
{
RMATCH(eptr, ecode, offset_top, md, eptrb, RM18);
@@ -3045,6 +3054,9 @@ for (;;)
if ((BYTE_MAP[c/8] & (1 << (c&7))) == 0) break;
eptr++;
}
+
+ if (possessive) continue; /* No backtracking */
+
while (eptr >= pp)
{
RMATCH(eptr, ecode, offset_top, md, eptrb, RM19);
@@ -3078,8 +3090,12 @@ for (;;)
case OP_CRMINPLUS:
case OP_CRQUERY:
case OP_CRMINQUERY:
+ case OP_CRPOSSTAR:
+ case OP_CRPOSPLUS:
+ case OP_CRPOSQUERY:
c = *ecode++ - OP_CRSTAR;
- minimize = (c & 1) != 0;
+ if (c < OP_CRPOSSTAR - OP_CRSTAR) minimize = (c & 1) != 0;
+ else possessive = TRUE;
min = rep_min[c]; /* Pick up values from tables; */
max = rep_max[c]; /* zero for max => infinity */
if (max == 0) max = INT_MAX;
@@ -3087,7 +3103,9 @@ for (;;)
case OP_CRRANGE:
case OP_CRMINRANGE:
+ case OP_CRPOSRANGE:
minimize = (*ecode == OP_CRMINRANGE);
+ possessive = (*ecode == OP_CRPOSRANGE);
min = GET2(ecode, 1);
max = GET2(ecode, 1 + IMM2_SIZE);
if (max == 0) max = INT_MAX;
@@ -3159,6 +3177,9 @@ for (;;)
if (!PRIV(xclass)(c, data, utf)) break;
eptr += len;
}
+
+ if (possessive) continue; /* No backtracking */
+
for(;;)
{
RMATCH(eptr, ecode, offset_top, md, eptrb, RM21);
diff --git a/pcre_internal.h b/pcre_internal.h
index 124d28c..c483e4c 100644
--- a/pcre_internal.h
+++ b/pcre_internal.h
@@ -2070,91 +2070,96 @@ enum {
OP_CRRANGE, /* 104 These are different to the three sets above. */
OP_CRMINRANGE, /* 105 */
+ OP_CRPOSSTAR, /* 106 Possessified versions */
+ OP_CRPOSPLUS, /* 107 */
+ OP_CRPOSQUERY, /* 108 */
+ OP_CRPOSRANGE, /* 109 */
+
/* End of quantifier opcodes */
- OP_CLASS, /* 106 Match a character class, chars < 256 only */
- OP_NCLASS, /* 107 Same, but the bitmap was created from a negative
+ OP_CLASS, /* 110 Match a character class, chars < 256 only */
+ OP_NCLASS, /* 111 Same, but the bitmap was created from a negative
class - the difference is relevant only when a
character > 255 is encountered. */
- OP_XCLASS, /* 108 Extended class for handling > 255 chars within the
+ OP_XCLASS, /* 112 Extended class for handling > 255 chars within the
class. This does both positive and negative. */
- OP_REF, /* 109 Match a back reference, casefully */
- OP_REFI, /* 110 Match a back reference, caselessly */
- OP_DNREF, /* 111 Match a duplicate name backref, casefully */
- OP_DNREFI, /* 112 Match a duplicate name backref, caselessly */
- OP_RECURSE, /* 113 Match a numbered subpattern (possibly recursive) */
- OP_CALLOUT, /* 114 Call out to external function if provided */
-
- OP_ALT, /* 115 Start of alternation */
- OP_KET, /* 116 End of group that doesn't have an unbounded repeat */
- OP_KETRMAX, /* 117 These two must remain together and in this */
- OP_KETRMIN, /* 118 order. They are for groups the repeat for ever. */
- OP_KETRPOS, /* 119 Possessive unlimited repeat. */
+ OP_REF, /* 113 Match a back reference, casefully */
+ OP_REFI, /* 114 Match a back reference, caselessly */
+ OP_DNREF, /* 115 Match a duplicate name backref, casefully */
+ OP_DNREFI, /* 116 Match a duplicate name backref, caselessly */
+ OP_RECURSE, /* 117 Match a numbered subpattern (possibly recursive) */
+ OP_CALLOUT, /* 118 Call out to external function if provided */
+
+ OP_ALT, /* 119 Start of alternation */
+ OP_KET, /* 120 End of group that doesn't have an unbounded repeat */
+ OP_KETRMAX, /* 121 These two must remain together and in this */
+ OP_KETRMIN, /* 122 order. They are for groups the repeat for ever. */
+ OP_KETRPOS, /* 123 Possessive unlimited repeat. */
/* The assertions must come before BRA, CBRA, ONCE, and COND, and the four
asserts must remain in order. */
- OP_REVERSE, /* 129 Move pointer back - used in lookbehind assertions */
- OP_ASSERT, /* 121 Positive lookahead */
- OP_ASSERT_NOT, /* 122 Negative lookahead */
- OP_ASSERTBACK, /* 123 Positive lookbehind */
- OP_ASSERTBACK_NOT, /* 124 Negative lookbehind */
+ OP_REVERSE, /* 124 Move pointer back - used in lookbehind assertions */
+ OP_ASSERT, /* 125 Positive lookahead */
+ OP_ASSERT_NOT, /* 126 Negative lookahead */
+ OP_ASSERTBACK, /* 127 Positive lookbehind */
+ OP_ASSERTBACK_NOT, /* 128 Negative lookbehind */
/* ONCE, ONCE_NC, BRA, BRAPOS, CBRA, CBRAPOS, and COND must come immediately
after the assertions, with ONCE first, as there's a test for >= ONCE for a
subpattern that isn't an assertion. The POS versions must immediately follow
the non-POS versions in each case. */
- OP_ONCE, /* 125 Atomic group, contains captures */
- OP_ONCE_NC, /* 126 Atomic group containing no captures */
- OP_BRA, /* 127 Start of non-capturing bracket */
- OP_BRAPOS, /* 128 Ditto, with unlimited, possessive repeat */
- OP_CBRA, /* 129 Start of capturing bracket */
- OP_CBRAPOS, /* 130 Ditto, with unlimited, possessive repeat */
- OP_COND, /* 131 Conditional group */
+ OP_ONCE, /* 129 Atomic group, contains captures */
+ OP_ONCE_NC, /* 130 Atomic group containing no captures */
+ OP_BRA, /* 131 Start of non-capturing bracket */
+ OP_BRAPOS, /* 132 Ditto, with unlimited, possessive repeat */
+ OP_CBRA, /* 133 Start of capturing bracket */
+ OP_CBRAPOS, /* 134 Ditto, with unlimited, possessive repeat */
+ OP_COND, /* 135 Conditional group */
/* These five must follow the previous five, in the same order. There's a
check for >= SBRA to distinguish the two sets. */
- OP_SBRA, /* 132 Start of non-capturing bracket, check empty */
- OP_SBRAPOS, /* 133 Ditto, with unlimited, possessive repeat */
- OP_SCBRA, /* 134 Start of capturing bracket, check empty */
- OP_SCBRAPOS, /* 135 Ditto, with unlimited, possessive repeat */
- OP_SCOND, /* 136 Conditional group, check empty */
+ OP_SBRA, /* 136 Start of non-capturing bracket, check empty */
+ OP_SBRAPOS, /* 137 Ditto, with unlimited, possessive repeat */
+ OP_SCBRA, /* 138 Start of capturing bracket, check empty */
+ OP_SCBRAPOS, /* 139 Ditto, with unlimited, possessive repeat */
+ OP_SCOND, /* 140 Conditional group, check empty */
/* The next two pairs must (respectively) be kept together. */
- OP_CREF, /* 137 Used to hold a capture number as condition */
- OP_DNCREF, /* 138 Used to point to duplicate names as a condition */
- OP_RREF, /* 139 Used to hold a recursion number as condition */
- OP_DNRREF, /* 140 Used to point to duplicate names as a condition */
- OP_DEF, /* 141 The DEFINE condition */
+ OP_CREF, /* 141 Used to hold a capture number as condition */
+ OP_DNCREF, /* 142 Used to point to duplicate names as a condition */
+ OP_RREF, /* 143 Used to hold a recursion number as condition */
+ OP_DNRREF, /* 144 Used to point to duplicate names as a condition */
+ OP_DEF, /* 145 The DEFINE condition */
- OP_BRAZERO, /* 142 These two must remain together and in this */
- OP_BRAMINZERO, /* 143 order. */
- OP_BRAPOSZERO, /* 144 */
+ OP_BRAZERO, /* 146 These two must remain together and in this */
+ OP_BRAMINZERO, /* 147 order. */
+ OP_BRAPOSZERO, /* 148 */
/* These are backtracking control verbs */
- OP_MARK, /* 145 always has an argument */
- OP_PRUNE, /* 146 */
- OP_PRUNE_ARG, /* 147 same, but with argument */
- OP_SKIP, /* 148 */
- OP_SKIP_ARG, /* 149 same, but with argument */
- OP_THEN, /* 150 */
- OP_THEN_ARG, /* 151 same, but with argument */
- OP_COMMIT, /* 152 */
+ OP_MARK, /* 149 always has an argument */
+ OP_PRUNE, /* 150 */
+ OP_PRUNE_ARG, /* 151 same, but with argument */
+ OP_SKIP, /* 152 */
+ OP_SKIP_ARG, /* 153 same, but with argument */
+ OP_THEN, /* 154 */
+ OP_THEN_ARG, /* 155 same, but with argument */
+ OP_COMMIT, /* 156 */
/* These are forced failure and success verbs */
- OP_FAIL, /* 153 */
- OP_ACCEPT, /* 154 */
- OP_ASSERT_ACCEPT, /* 155 Used inside assertions */
- OP_CLOSE, /* 156 Used before OP_ACCEPT to close open captures */
+ OP_FAIL, /* 157 */
+ OP_ACCEPT, /* 158 */
+ OP_ASSERT_ACCEPT, /* 159 Used inside assertions */
+ OP_CLOSE, /* 160 Used before OP_ACCEPT to close open captures */
/* This is used to skip a subpattern with a {0} quantifier */
- OP_SKIPZERO, /* 157 */
+ OP_SKIPZERO, /* 161 */
/* This is not an opcode, but is used to check that tables indexed by opcode
are the correct length, in order to catch updating errors - there have been
@@ -2194,6 +2199,7 @@ some cases doesn't actually use these names at all). */
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", \
"*+","++", "?+", "{", \
"*", "*?", "+", "+?", "?", "??", "{", "{", \
+ "*+","++", "?+", "{", \
"class", "nclass", "xclass", "Ref", "Refi", "DnRef", "DnRefi", \
"Recurse", "Callout", \
"Alt", "Ket", "KetRmax", "KetRmin", "KetRpos", \
@@ -2259,6 +2265,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */
/* Character class & ref repeats */ \
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ \
1+2*IMM2_SIZE, 1+2*IMM2_SIZE, /* CRRANGE, CRMINRANGE */ \
+ 1, 1, 1, 1+2*IMM2_SIZE, /* Possessive *+, ++, ?+, CRPOSRANGE */ \
1+(32/sizeof(pcre_uchar)), /* CLASS */ \
1+(32/sizeof(pcre_uchar)), /* NCLASS */ \
0, /* XCLASS - variable length */ \
diff --git a/pcre_jit_compile.c b/pcre_jit_compile.c
index a8a3426..c62ef0d 100644
--- a/pcre_jit_compile.c
+++ b/pcre_jit_compile.c
@@ -585,6 +585,10 @@ switch(*cc)
case OP_CRMINQUERY:
case OP_CRRANGE:
case OP_CRMINRANGE:
+ case OP_CRPOSSTAR:
+ case OP_CRPOSPLUS:
+ case OP_CRPOSQUERY:
+ case OP_CRPOSRANGE:
case OP_CLASS:
case OP_NCLASS:
case OP_REF:
@@ -6919,7 +6923,7 @@ count_match(common);
return cc + 1 + LINK_SIZE;
}
-static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *arg1, int *arg2, pcre_uchar **end)
+static SLJIT_INLINE pcre_uchar *get_iterator_parameters(compiler_common *common, pcre_uchar *cc, pcre_uchar *opcode, pcre_uchar *type, int *max, int *min, pcre_uchar **end)
{
int class_len;
@@ -6955,7 +6959,7 @@ else if (*opcode >= OP_TYPESTAR && *opcode <= OP_TYPEPOSUPTO)
}
else
{
- SLJIT_ASSERT(*opcode >= OP_CLASS || *opcode <= OP_XCLASS);
+ SLJIT_ASSERT(*opcode == OP_CLASS || *opcode == OP_NCLASS || *opcode == OP_XCLASS);
*type = *opcode;
cc++;
class_len = (*type < OP_XCLASS) ? (int)(1 + (32 / sizeof(pcre_uchar))) : GET(cc, 0);
@@ -6966,18 +6970,24 @@ else
if (end != NULL)
*end = cc + class_len;
}
+ else if (*opcode >= OP_CRPOSSTAR && *opcode <= OP_CRPOSQUERY)
+ {
+ *opcode -= OP_CRPOSSTAR - OP_POSSTAR;
+ if (end != NULL)
+ *end = cc + class_len;
+ }
else
{
- SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE);
- *arg1 = GET2(cc, (class_len + IMM2_SIZE));
- *arg2 = GET2(cc, class_len);
+ SLJIT_ASSERT(*opcode == OP_CRRANGE || *opcode == OP_CRMINRANGE || *opcode == OP_CRPOSRANGE);
+ *max = GET2(cc, (class_len + IMM2_SIZE));
+ *min = GET2(cc, class_len);
- if (*arg2 == 0)
+ if (*min == 0)
{
- SLJIT_ASSERT(*arg1 != 0);
- *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : OP_MINUPTO;
+ SLJIT_ASSERT(*max != 0);
+ *opcode = (*opcode == OP_CRRANGE) ? OP_UPTO : (*opcode == OP_CRMINRANGE ? OP_MINUPTO : OP_POSUPTO);
}
- if (*arg1 == *arg2)
+ if (*max == *min)
*opcode = OP_EXACT;
if (end != NULL)
@@ -6988,7 +6998,7 @@ else
if (*opcode == OP_UPTO || *opcode == OP_MINUPTO || *opcode == OP_EXACT || *opcode == OP_POSUPTO)
{
- *arg1 = GET2(cc, 0);
+ *max = GET2(cc, 0);
cc += IMM2_SIZE;
}
@@ -7017,7 +7027,7 @@ DEFINE_COMPILER;
backtrack_common *backtrack;
pcre_uchar opcode;
pcre_uchar type;
-int arg1 = -1, arg2 = -1;
+int max = -1, min = -1;
pcre_uchar* end;
jump_list *nomatch = NULL;
struct sljit_jump *jump = NULL;
@@ -7030,7 +7040,7 @@ int tmp_base, tmp_offset;
PUSH_BACKTRACK(sizeof(iterator_backtrack), cc, NULL);
-cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, &end);
+cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, &end);
switch(type)
{
@@ -7101,10 +7111,10 @@ switch(opcode)
{
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
- if (opcode == OP_CRRANGE && arg2 > 0)
- CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2, label);
- if (opcode == OP_UPTO || (opcode == OP_CRRANGE && arg1 > 0))
- jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, arg1);
+ if (opcode == OP_CRRANGE && min > 0)
+ CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min, label);
+ if (opcode == OP_UPTO || (opcode == OP_CRRANGE && max > 0))
+ jump = CMP(SLJIT_C_GREATER_EQUAL, TMP1, 0, SLJIT_IMM, max);
OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, TMP1, 0);
}
@@ -7131,7 +7141,7 @@ switch(opcode)
OP1(SLJIT_MOV, base, offset0, STR_PTR, 0);
if (opcode <= OP_PLUS)
JUMPTO(SLJIT_JUMP, label);
- else if (opcode == OP_CRRANGE && arg1 == 0)
+ else if (opcode == OP_CRRANGE && max == 0)
{
OP2(SLJIT_ADD, base, offset1, base, offset1, SLJIT_IMM, 1);
JUMPTO(SLJIT_JUMP, label);
@@ -7141,11 +7151,11 @@ switch(opcode)
OP1(SLJIT_MOV, TMP1, 0, base, offset1);
OP2(SLJIT_ADD, TMP1, 0, TMP1, 0, SLJIT_IMM, 1);
OP1(SLJIT_MOV, base, offset1, TMP1, 0);
- CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 1, label);
+ CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, max + 1, label);
}
set_jumps(nomatch, LABEL());
if (opcode == OP_CRRANGE)
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, arg2 + 1));
+ add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_C_LESS, base, offset1, SLJIT_IMM, min + 1));
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
}
BACKTRACK_AS(iterator_backtrack)->matchingpath = LABEL();
@@ -7183,7 +7193,7 @@ switch(opcode)
break;
case OP_EXACT:
- OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, arg1);
+ OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, max);
label = LABEL();
compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
@@ -7196,7 +7206,7 @@ switch(opcode)
if (opcode == OP_POSPLUS)
compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
if (opcode == OP_POSUPTO)
- OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, arg1);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, max);
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
label = LABEL();
compile_char1_matchingpath(common, type, cc, &nomatch);
@@ -7220,6 +7230,34 @@ switch(opcode)
OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
break;
+ case OP_CRPOSRANGE:
+ /* Combination of OP_EXACT and OP_POSSTAR or OP_POSUPTO */
+ OP1(SLJIT_MOV, tmp_base, tmp_offset, SLJIT_IMM, min);
+ label = LABEL();
+ compile_char1_matchingpath(common, type, cc, &backtrack->topbacktracks);
+ OP2(SLJIT_SUB | SLJIT_SET_E, tmp_base, tmp_offset, tmp_base, tmp_offset, SLJIT_IMM, 1);
+ JUMPTO(SLJIT_C_NOT_ZERO, label);
+
+ if (max != 0)
+ {
+ SLJIT_ASSERT(max - min > 0);
+ OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, max - min);
+ }
+ OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
+ label = LABEL();
+ compile_char1_matchingpath(common, type, cc, &nomatch);
+ OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
+ if (max == 0)
+ JUMPTO(SLJIT_JUMP, label);
+ else
+ {
+ OP2(SLJIT_SUB | SLJIT_SET_E, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, SLJIT_IMM, 1);
+ JUMPTO(SLJIT_C_NOT_ZERO, label);
+ }
+ set_jumps(nomatch, LABEL());
+ OP1(SLJIT_MOV, STR_PTR, 0, tmp_base, tmp_offset);
+ break;
+
default:
SLJIT_ASSERT_STOP();
break;
@@ -7497,7 +7535,7 @@ while (cc < ccend)
case OP_CLASS:
case OP_NCLASS:
- if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRMINRANGE)
+ if (cc[1 + (32 / sizeof(pcre_uchar))] >= OP_CRSTAR && cc[1 + (32 / sizeof(pcre_uchar))] <= OP_CRPOSRANGE)
cc = compile_iterator_matchingpath(common, cc, parent);
else
cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
@@ -7505,7 +7543,7 @@ while (cc < ccend)
#if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
case OP_XCLASS:
- if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRMINRANGE)
+ if (*(cc + GET(cc, 1)) >= OP_CRSTAR && *(cc + GET(cc, 1)) <= OP_CRPOSRANGE)
cc = compile_iterator_matchingpath(common, cc, parent);
else
cc = compile_char1_matchingpath(common, *cc, cc + 1, parent->top != NULL ? &parent->top->nextbacktracks : &parent->topbacktracks);
@@ -7514,7 +7552,7 @@ while (cc < ccend)
case OP_REF:
case OP_REFI:
- if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRMINRANGE)
+ if (cc[1 + IMM2_SIZE] >= OP_CRSTAR && cc[1 + IMM2_SIZE] <= OP_CRPOSRANGE)
cc = compile_ref_iterator_matchingpath(common, cc, parent);
else
{
@@ -7525,7 +7563,7 @@ while (cc < ccend)
case OP_DNREF:
case OP_DNREFI:
- if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRMINRANGE)
+ if (cc[1 + 2 * IMM2_SIZE] >= OP_CRSTAR && cc[1 + 2 * IMM2_SIZE] <= OP_CRPOSRANGE)
cc = compile_ref_iterator_matchingpath(common, cc, parent);
else
{
@@ -7685,7 +7723,7 @@ DEFINE_COMPILER;
pcre_uchar *cc = current->cc;
pcre_uchar opcode;
pcre_uchar type;
-int arg1 = -1, arg2 = -1;
+int max = -1, min = -1;
struct sljit_label *label = NULL;
struct sljit_jump *jump = NULL;
jump_list *jumplist = NULL;
@@ -7694,7 +7732,7 @@ int base = (private_data_ptr == 0) ? SLJIT_MEM1(STACK_TOP) : SLJIT_MEM1(SLJIT_LO
int offset0 = (private_data_ptr == 0) ? STACK(0) : private_data_ptr;
int offset1 = (private_data_ptr == 0) ? STACK(1) : private_data_ptr + (int)sizeof(sljit_sw);
-cc = get_iterator_parameters(common, cc, &opcode, &type, &arg1, &arg2, NULL);
+cc = get_iterator_parameters(common, cc, &opcode, &type, &max, &min, NULL);
switch(opcode)
{
@@ -7713,7 +7751,7 @@ switch(opcode)
else
{
if (opcode == OP_UPTO)
- arg2 = 0;
+ min = 0;
if (opcode <= OP_PLUS)
{
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
@@ -7723,7 +7761,7 @@ switch(opcode)
{
OP1(SLJIT_MOV, TMP1, 0, base, offset1);
OP1(SLJIT_MOV, STR_PTR, 0, base, offset0);
- jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, arg2 + 1);
+ jump = CMP(SLJIT_C_LESS_EQUAL, TMP1, 0, SLJIT_IMM, min + 1);
OP2(SLJIT_SUB, base, offset1, TMP1, 0, SLJIT_IMM, 1);
}
skip_char_back(common);
@@ -7768,12 +7806,12 @@ switch(opcode)
OP1(SLJIT_MOV, base, offset1, TMP1, 0);
if (opcode == OP_CRMINRANGE)
- CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg2 + 1, label);
+ CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, min + 1, label);
- if (opcode == OP_CRMINRANGE && arg1 == 0)
+ if (opcode == OP_CRMINRANGE && max == 0)
JUMPTO(SLJIT_JUMP, CURRENT_AS(iterator_backtrack)->matchingpath);
else
- CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, arg1 + 2, CURRENT_AS(iterator_backtrack)->matchingpath);
+ CMPTO(SLJIT_C_LESS, TMP1, 0, SLJIT_IMM, max + 2, CURRENT_AS(iterator_backtrack)->matchingpath);
set_jumps(jumplist, LABEL());
if (private_data_ptr == 0)
@@ -7808,6 +7846,7 @@ switch(opcode)
case OP_EXACT:
case OP_POSPLUS:
+ case OP_CRPOSRANGE:
set_jumps(current->topbacktracks, LABEL());
break;
diff --git a/pcre_printint.c b/pcre_printint.c
index 5e6a1ae..65b8e7d 100644
--- a/pcre_printint.c
+++ b/pcre_printint.c
@@ -735,17 +735,22 @@ for(;;)
case OP_CRMINPLUS:
case OP_CRQUERY:
case OP_CRMINQUERY:
+ case OP_CRPOSSTAR:
+ case OP_CRPOSPLUS:
+ case OP_CRPOSQUERY:
fprintf(f, "%s", priv_OP_names[*ccode]);
extra += priv_OP_lengths[*ccode];
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
+ case OP_CRPOSRANGE:
min = GET2(ccode,1);
max = GET2(ccode,1 + IMM2_SIZE);
if (max == 0) fprintf(f, "{%u,}", min);
else fprintf(f, "{%u,%u}", min, max);
if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
+ else if (*ccode == OP_CRPOSRANGE) fprintf(f, "+");
extra += priv_OP_lengths[*ccode];
break;
diff --git a/pcre_study.c b/pcre_study.c
index e180caf..7e53bdb 100644
--- a/pcre_study.c
+++ b/pcre_study.c
@@ -342,6 +342,7 @@ for (;;)
{
case OP_CRPLUS:
case OP_CRMINPLUS:
+ case OP_CRPOSPLUS:
branchlength++;
/* Fall through */
@@ -349,11 +350,14 @@ for (;;)
case OP_CRMINSTAR:
case OP_CRQUERY:
case OP_CRMINQUERY:
+ case OP_CRPOSSTAR:
+ case OP_CRPOSQUERY:
cc++;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
+ case OP_CRPOSRANGE:
branchlength += GET2(cc,1);
cc += 1 + 2 * IMM2_SIZE;
break;
@@ -436,18 +440,22 @@ for (;;)
case OP_CRMINSTAR:
case OP_CRQUERY:
case OP_CRMINQUERY:
+ case OP_CRPOSSTAR:
+ case OP_CRPOSQUERY:
min = 0;
cc++;
break;
case OP_CRPLUS:
case OP_CRMINPLUS:
+ case OP_CRPOSPLUS:
min = 1;
cc++;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
+ case OP_CRPOSRANGE:
min = GET2(cc, 1);
cc += 1 + 2 * IMM2_SIZE;
break;
@@ -1305,11 +1313,14 @@ do
case OP_CRMINSTAR:
case OP_CRQUERY:
case OP_CRMINQUERY:
+ case OP_CRPOSSTAR:
+ case OP_CRPOSQUERY:
tcode++;
break;
case OP_CRRANGE:
case OP_CRMINRANGE:
+ case OP_CRPOSRANGE:
if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE;
else try_next = FALSE;
break;
diff --git a/testdata/saved16BE-1 b/testdata/saved16BE-1
index 1bd9fa5..e6edddc 100644
--- a/testdata/saved16BE-1
+++ b/testdata/saved16BE-1
Binary files differ
diff --git a/testdata/saved16BE-2 b/testdata/saved16BE-2
index 063d6bc..c91ce37 100644
--- a/testdata/saved16BE-2
+++ b/testdata/saved16BE-2
Binary files differ
diff --git a/testdata/saved16LE-1 b/testdata/saved16LE-1
index 65f9d1c..5035ec0 100644
--- a/testdata/saved16LE-1
+++ b/testdata/saved16LE-1
Binary files differ
diff --git a/testdata/saved16LE-2 b/testdata/saved16LE-2
index b74d9a0..656c058 100644
--- a/testdata/saved16LE-2
+++ b/testdata/saved16LE-2
Binary files differ
diff --git a/testdata/saved32BE-1 b/testdata/saved32BE-1
index 2573d72..b4c2ffe 100644
--- a/testdata/saved32BE-1
+++ b/testdata/saved32BE-1
Binary files differ
diff --git a/testdata/saved32BE-2 b/testdata/saved32BE-2
index ae6c18f..79bb5e8 100644
--- a/testdata/saved32BE-2
+++ b/testdata/saved32BE-2
Binary files differ
diff --git a/testdata/saved32LE-1 b/testdata/saved32LE-1
index 7f6bddb..49392b8 100644
--- a/testdata/saved32LE-1
+++ b/testdata/saved32LE-1
Binary files differ
diff --git a/testdata/saved32LE-2 b/testdata/saved32LE-2
index d260260..5f64af9 100644
--- a/testdata/saved32LE-2
+++ b/testdata/saved32LE-2
Binary files differ
diff --git a/testdata/testinput2 b/testdata/testinput2
index 3604a8e..bf3d926 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -3898,6 +3898,36 @@ backtracking verbs. --/
/a+(?:bb)?a#a+(?:|||)#a+(?:|b)a#a+(?:|||)?a/BZ
+/[ab]*/BZ
+ aaaa
+
+/[ab]*?/BZ
+ aaaa
+
+/[ab]?/BZ
+ aaaa
+
+/[ab]??/BZ
+ aaaa
+
+/[ab]+/BZ
+ aaaa
+
+/[ab]+?/BZ
+ aaaa
+
+/[ab]{2,3}/BZ
+ aaaa
+
+/[ab]{2,3}?/BZ
+ aaaa
+
+/[ab]{2,}/BZ
+ aaaa
+
+/[ab]{2,}?/BZ
+ aaaa
+
/-- End of special auto-possessive tests --/
/^A\o{1239}B/
diff --git a/testdata/testinput8 b/testdata/testinput8
index fe836d4..1a74eb7 100644
--- a/testdata/testinput8
+++ b/testdata/testinput8
@@ -4803,4 +4803,36 @@
/abcd/
abcd\O0
+/-- These tests show up auto-possessification --/
+
+/[ab]*/
+ aaaa
+
+/[ab]*?/
+ aaaa
+
+/[ab]?/
+ aaaa
+
+/[ab]??/
+ aaaa
+
+/[ab]+/
+ aaaa
+
+/[ab]+?/
+ aaaa
+
+/[ab]{2,3}/
+ aaaa
+
+/[ab]{2,3}?/
+ aaaa
+
+/[ab]{2,}/
+ aaaa
+
+/[ab]{2,}?/
+ aaaa
+
/-- End of testinput8 --/
diff --git a/testdata/testoutput14 b/testdata/testoutput14
index 1a94420..8ef235e 100644
--- a/testdata/testoutput14
+++ b/testdata/testoutput14
@@ -432,7 +432,7 @@ Starting byte set: \x0a \x0b \x0c \x0d \x85
/[\h]+/BZ
------------------------------------------------------------------
Bra
- [\x09 \xa0]+
+ [\x09 \xa0]++
Ket
End
------------------------------------------------------------------
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 7a070dc..690226e 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -4591,7 +4591,7 @@ No need char
------------------------------------------------------------------
Bra
Once
- [ab]{1,1}
+ [ab]{1,1}+
Ket
Ket
End
@@ -5304,7 +5304,7 @@ No match
Callout 255 0 21
CBra 1
Callout 255 1 9
- [ab]{1,4}
+ [ab]{1,4}+
Callout 255 10 1
c
Callout 255 11 0
@@ -5317,7 +5317,7 @@ No match
Ket
CBra 1
Callout 255 1 9
- [ab]{1,4}
+ [ab]{1,4}+
Callout 255 10 1
c
Callout 255 11 0
@@ -5330,7 +5330,7 @@ No match
Ket
CBra 1
Callout 255 1 9
- [ab]{1,4}
+ [ab]{1,4}+
Callout 255 10 1
c
Callout 255 11 0
@@ -5343,7 +5343,7 @@ No match
Ket
CBra 1
Callout 255 1 9
- [ab]{1,4}
+ [ab]{1,4}+
Callout 255 10 1
c
Callout 255 11 0
@@ -5357,7 +5357,7 @@ No match
Braminzero
CBra 1
Callout 255 1 9
- [ab]{1,4}
+ [ab]{1,4}+
Callout 255 10 1
c
Callout 255 11 0
@@ -7644,7 +7644,7 @@ No match
------------------------------------------------------------------
Bra
^
- [a-z]+
+ [a-z]++
Ket
End
------------------------------------------------------------------
@@ -12757,7 +12757,7 @@ No set of starting bytes
/[bcd]*a/BZ
------------------------------------------------------------------
Bra
- [b-d]*
+ [b-d]*+
a
Ket
End
@@ -13647,6 +13647,106 @@ No set of starting bytes
End
------------------------------------------------------------------
+/[ab]*/BZ
+------------------------------------------------------------------
+ Bra
+ [ab]*+
+ Ket
+ End
+------------------------------------------------------------------
+ aaaa
+ 0: aaaa
+
+/[ab]*?/BZ
+------------------------------------------------------------------
+ Bra
+ [ab]*?
+ Ket
+ End
+------------------------------------------------------------------
+ aaaa
+ 0:
+
+/[ab]?/BZ
+------------------------------------------------------------------
+ Bra
+ [ab]?+
+ Ket
+ End
+------------------------------------------------------------------
+ aaaa
+ 0: a
+
+/[ab]??/BZ
+------------------------------------------------------------------
+ Bra
+ [ab]??
+ Ket
+ End
+------------------------------------------------------------------
+ aaaa
+ 0:
+
+/[ab]+/BZ
+------------------------------------------------------------------
+ Bra
+ [ab]++
+ Ket
+ End
+------------------------------------------------------------------
+ aaaa
+ 0: aaaa
+
+/[ab]+?/BZ
+------------------------------------------------------------------
+ Bra
+ [ab]+?
+ Ket
+ End
+------------------------------------------------------------------
+ aaaa
+ 0: a
+
+/[ab]{2,3}/BZ
+------------------------------------------------------------------
+ Bra
+ [ab]{2,3}+
+ Ket
+ End
+------------------------------------------------------------------
+ aaaa
+ 0: aaa
+
+/[ab]{2,3}?/BZ
+------------------------------------------------------------------
+ Bra
+ [ab]{2,3}?
+ Ket
+ End
+------------------------------------------------------------------
+ aaaa
+ 0: aa
+
+/[ab]{2,}/BZ
+------------------------------------------------------------------
+ Bra
+ [ab]{2,}+
+ Ket
+ End
+------------------------------------------------------------------
+ aaaa
+ 0: aaaa
+
+/[ab]{2,}?/BZ
+------------------------------------------------------------------
+ Bra
+ [ab]{2,}?
+ Ket
+ End
+------------------------------------------------------------------
+ aaaa
+ 0: aa
+
/-- End of special auto-possessive tests --/
/^A\o{1239}B/
diff --git a/testdata/testoutput8 b/testdata/testoutput8
index 5ca6b45..9cb06fb 100644
--- a/testdata/testoutput8
+++ b/testdata/testoutput8
@@ -1549,18 +1549,6 @@ No match
/^[.^$|()*+?{,}]+/
.^\$(*+)|{?,?}
0: .^$(*+)|{?,?}
- 1: .^$(*+)|{?,?
- 2: .^$(*+)|{?,
- 3: .^$(*+)|{?
- 4: .^$(*+)|{
- 5: .^$(*+)|
- 6: .^$(*+)
- 7: .^$(*+
- 8: .^$(*
- 9: .^$(
-10: .^$
-11: .^
-12: .
/^a*\w/
z
@@ -2790,8 +2778,6 @@ No match
/[-az]+/
az-
0: az-
- 1: az
- 2: a
*** Failers
0: a
b
@@ -2800,8 +2786,6 @@ No match
/[az-]+/
za-
0: za-
- 1: za
- 2: z
*** Failers
0: a
b
@@ -2810,8 +2794,6 @@ No match
/[a\-z]+/
a-z
0: a-z
- 1: a-
- 2: a
*** Failers
0: a
b
@@ -2820,20 +2802,10 @@ No match
/[a-z]+/
abcdxyz
0: abcdxyz
- 1: abcdxy
- 2: abcdx
- 3: abcd
- 4: abc
- 5: ab
- 6: a
/[\d-]+/
12-34
0: 12-34
- 1: 12-3
- 2: 12-
- 3: 12
- 4: 1
*** Failers
No match
aaa
@@ -2842,11 +2814,6 @@ No match
/[\d-z]+/
12-34z
0: 12-34z
- 1: 12-34
- 2: 12-3
- 3: 12-
- 4: 12
- 5: 1
*** Failers
No match
aaa
@@ -4782,9 +4749,6 @@ No match
/[^ab]*/
cde
0: cde
- 1: cd
- 2: c
- 3:
/abc/
*** Failers
@@ -4884,10 +4848,6 @@ No match
/[a-zA-Z_][a-zA-Z0-9_]*/
alpha
0: alpha
- 1: alph
- 2: alp
- 3: al
- 4: a
/^a(bc+|b[eh])g|.h$/
abh
@@ -5244,9 +5204,6 @@ No match
/[^ab]*/i
CDE
0: CDE
- 1: CD
- 2: C
- 3:
/abc/i
@@ -5337,10 +5294,6 @@ No match
/[a-zA-Z_][a-zA-Z0-9_]*/i
ALPHA
0: ALPHA
- 1: ALPH
- 2: ALP
- 3: AL
- 4: A
/^a(bc+|b[eh])g|.h$/i
ABH
@@ -5897,17 +5850,14 @@ No match
/([[:]+)/
a:[b]:
0: :[
- 1: :
/([[=]+)/
a=[b]=
0: =[
- 1: =
/([[.]+)/
a.[b].
0: .[
- 1: .
/((?>a+)b)/
aaab
@@ -6074,25 +6024,14 @@ No match
/[[:space:]]+/
> \x09\x0a\x0c\x0d\x0b<
0: \x09\x0a\x0c\x0d\x0b
- 1: \x09\x0a\x0c\x0d
- 2: \x09\x0a\x0c
- 3: \x09\x0a
- 4: \x09
- 5:
/[[:blank:]]+/
> \x09\x0a\x0c\x0d\x0b<
0: \x09
- 1:
/[\s]+/
> \x09\x0a\x0c\x0d\x0b<
0: \x09\x0a\x0c\x0d\x0b
- 1: \x09\x0a\x0c\x0d
- 2: \x09\x0a\x0c
- 3: \x09\x0a
- 4: \x09
- 5:
/\s+/
> \x09\x0a\x0c\x0d\x0b<
@@ -6403,8 +6342,6 @@ Partial match: 123
/Content-Type\x3A[^\r\n]{6,}/
Content-Type:xxxxxyyy
0: Content-Type:xxxxxyyy
- 1: Content-Type:xxxxxyy
- 2: Content-Type:xxxxxy
/Content-Type\x3A[^\r\n]{6,}z/
Content-Type:xxxxxyyyz
@@ -7354,8 +7291,6 @@ Partial match: abc1
/abc[de]*/
xxxxabcde\P
0: abcde
- 1: abcd
- 2: abc
xxxxabcde\P\P
Partial match: abcde
@@ -7798,4 +7733,57 @@ Error -30 (invalid data in workspace for DFA restart)
abcd\O0
Matched, but offsets vector is too small to show all matches
+/-- These tests show up auto-possessification --/
+
+/[ab]*/
+ aaaa
+ 0: aaaa
+
+/[ab]*?/
+ aaaa
+ 0: aaaa
+ 1: aaa
+ 2: aa
+ 3: a
+ 4:
+
+/[ab]?/
+ aaaa
+ 0: a
+
+/[ab]??/
+ aaaa
+ 0: a
+ 1:
+
+/[ab]+/
+ aaaa
+ 0: aaaa
+
+/[ab]+?/
+ aaaa
+ 0: aaaa
+ 1: aaa
+ 2: aa
+ 3: a
+
+/[ab]{2,3}/
+ aaaa
+ 0: aaa
+
+/[ab]{2,3}?/
+ aaaa
+ 0: aaa
+ 1: aa
+
+/[ab]{2,}/
+ aaaa
+ 0: aaaa
+
+/[ab]{2,}?/
+ aaaa
+ 0: aaaa
+ 1: aaa
+ 2: aa
+
/-- End of testinput8 --/
diff --git a/testdata/testoutput9 b/testdata/testoutput9
index 1b38337..797d9ac 100644
--- a/testdata/testoutput9
+++ b/testdata/testoutput9
@@ -1227,8 +1227,6 @@ Partial match: abc1
/abc[de]*/8
xxxxabcde\P
0: abcde
- 1: abcd
- 2: abc
xxxxabcde\P\P
Partial match: abcde