diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2009-10-17 19:55:02 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2009-10-17 19:55:02 +0000 |
commit | 1da028459167ef408e659b9fe91eb70f3b79e395 (patch) | |
tree | 2fbe158f25c2f1fc68cb1532b6dd7cee33a7ec94 | |
parent | f66c8de115b662c90e2a0af9a4357f69df2b3106 (diff) | |
download | pcre-1da028459167ef408e659b9fe91eb70f3b79e395.tar.gz |
Fix PCRE_PARTIAL_HARD for patterns that end optionally, e.g. abc*
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@462 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 3 | ||||
-rw-r--r-- | configure.ac | 4 | ||||
-rw-r--r-- | pcre_dfa_exec.c | 91 | ||||
-rw-r--r-- | pcre_exec.c | 270 | ||||
-rw-r--r-- | pcre_internal.h | 8 | ||||
-rw-r--r-- | testdata/testinput2 | 22 | ||||
-rw-r--r-- | testdata/testinput5 | 22 | ||||
-rw-r--r-- | testdata/testinput7 | 18 | ||||
-rw-r--r-- | testdata/testinput8 | 18 | ||||
-rw-r--r-- | testdata/testoutput2 | 35 | ||||
-rw-r--r-- | testdata/testoutput5 | 35 | ||||
-rw-r--r-- | testdata/testoutput7 | 34 | ||||
-rw-r--r-- | testdata/testoutput8 | 34 |
13 files changed, 533 insertions, 61 deletions
@@ -58,8 +58,7 @@ Version 8.00 05-Oct-09 10. Partial matching has been split into two forms: PCRE_PARTIAL_SOFT, which is synonymous with PCRE_PARTIAL, for backwards compatibility, and PCRE_PARTIAL_HARD, which causes a partial match to supersede a full match, - and may be more useful for multi-segment matching, especially with - pcre_exec(). + and may be more useful for multi-segment matching. 11. Partial matching with pcre_exec() is now more intuitive. A partial match used to be given if ever the end of the subject was reached; now it is diff --git a/configure.ac b/configure.ac index aed0d4c..2254be6 100644 --- a/configure.ac +++ b/configure.ac @@ -8,8 +8,8 @@ dnl empty. m4_define(pcre_major, [8]) m4_define(pcre_minor, [00]) -m4_define(pcre_prerelease, [-RC1]) -m4_define(pcre_date, [2009-10-05]) +m4_define(pcre_prerelease, [-RC2]) +m4_define(pcre_date, [2009-10-17]) # Libtool shared library interface versions (current:revision:age) m4_define(libpcre_version, [0:1:0]) diff --git a/pcre_dfa_exec.c b/pcre_dfa_exec.c index 93a885e..458bb4c 100644 --- a/pcre_dfa_exec.c +++ b/pcre_dfa_exec.c @@ -109,8 +109,9 @@ never stored, so we push them well clear of the normal opcodes. */ character that is to be tested in some way. This makes is possible to centralize the loading of these characters. In the case of Type * etc, the "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a -small value. ***NOTE*** If the start of this table is modified, the two tables -that follow must also be modified. */ +small value. Non-zero values in the table are the offsets from the opcode where +the character is to be found. ***NOTE*** If the start of this table is +modified, the three tables that follow must also be modified. */ static const uschar coptable[] = { 0, /* End */ @@ -160,7 +161,64 @@ static const uschar coptable[] = { 0, /* DEF */ 0, 0, /* BRAZERO, BRAMINZERO */ 0, 0, 0, 0, /* PRUNE, SKIP, THEN, COMMIT */ - 0, 0, 0 /* FAIL, ACCEPT, SKIPZERO */ + 0, 0, 0, 0 /* FAIL, ACCEPT, CLOSE, SKIPZERO */ +}; + +/* This table identifies those opcodes that inspect a character. It is used to +remember the fact that a character could have been inspected when the end of +the subject is reached, in order to support PCRE_PARTIAL_HARD behaviour. +***NOTE*** If the start of this table is modified, the two tables that follow +must also be modified. */ + +static const uschar poptable[] = { + 0, /* End */ + 0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */ + 1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ + 1, 1, 1, /* Any, AllAny, Anybyte */ + 1, 1, 1, /* NOTPROP, PROP, EXTUNI */ + 1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ + 0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */ + 1, /* Char */ + 1, /* Charnc */ + 1, /* not */ + /* Positive single-char repeats */ + 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ + 1, 1, 1, /* upto, minupto, exact */ + 1, 1, 1, 1, /* *+, ++, ?+, upto+ */ + /* Negative single-char repeats - only for chars < 256 */ + 1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */ + 1, 1, 1, /* NOT upto, minupto, exact */ + 1, 1, 1, 1, /* NOT *+, ++, ?+, upto+ */ + /* Positive type repeats */ + 1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */ + 1, 1, 1, /* Type upto, minupto, exact */ + 1, 1, 1, 1, /* Type *+, ++, ?+, upto+ */ + /* Character class & ref repeats */ + 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ + 1, 1, /* CRRANGE, CRMINRANGE */ + 1, /* CLASS */ + 1, /* NCLASS */ + 1, /* XCLASS - variable length */ + 0, /* REF */ + 0, /* RECURSE */ + 0, /* CALLOUT */ + 0, /* Alt */ + 0, /* Ket */ + 0, /* KetRmax */ + 0, /* KetRmin */ + 0, /* Assert */ + 0, /* Assert not */ + 0, /* Assert behind */ + 0, /* Assert behind not */ + 0, /* Reverse */ + 0, 0, 0, 0, /* ONCE, BRA, CBRA, COND */ + 0, 0, 0, /* SBRA, SCBRA, SCOND */ + 0, /* CREF */ + 0, /* RREF */ + 0, /* DEF */ + 0, 0, /* BRAZERO, BRAMINZERO */ + 0, 0, 0, 0, /* PRUNE, SKIP, THEN, COMMIT */ + 0, 0, 0, 0 /* FAIL, ACCEPT, CLOSE, SKIPZERO */ }; /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W, @@ -489,6 +547,7 @@ for (;;) unsigned int c, d; int forced_fail = 0; int reached_end = 0; + BOOL could_continue = FALSE; /* Make the new state list into the active state list and empty the new state list. */ @@ -596,6 +655,12 @@ for (;;) code = start_code + state_offset; codevalue = *code; + + /* If this opcode inspects a character, but we are at the end of the + subject, remember the fact so that we can support PCRE_PARTIAL_HARD. */ + + if (clen == 0 && poptable[codevalue] != 0) + could_continue = TRUE; /* If this opcode is followed by an inline character, load it. It is tempting to test for the presence of a subject character here, but that @@ -2522,16 +2587,24 @@ for (;;) /* We have finished the processing at the current subject character. If no new states have been set for the next character, we have found all the matches that we are going to find. If we are at the top level and partial - matching has been requested, check for appropriate conditions. The "forced_ - fail" variable counts the number of (*F) encountered for the character. If it - is equal to the original active_count (saved in workspace[1]) it means that - (*F) was found on every active state. In this case we don't want to give a - partial match. */ + matching has been requested, check for appropriate conditions. + + The "forced_ fail" variable counts the number of (*F) encountered for the + character. If it is equal to the original active_count (saved in + workspace[1]) it means that (*F) was found on every active state. In this + case we don't want to give a partial match. + + The "reached_end" variable counts the number of threads that have reached the + end of the pattern. The "could_continue" variable is true if a thread could + have continued but for the fact that the end of the subject was reached. */ if (new_count <= 0) { if (rlevel == 1 && /* Top level, and */ - reached_end != workspace[1] && /* Not all reached end */ + ( /* either... */ + reached_end != workspace[1] || /* Not all reached end */ + could_continue /* or some could go on */ + ) && /* and... */ forced_fail != workspace[1] && /* Not all forced fail & */ ( /* either... */ (md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */ diff --git a/pcre_exec.c b/pcre_exec.c index db1e926..f930095 100644 --- a/pcre_exec.c +++ b/pcre_exec.c @@ -415,7 +415,7 @@ the subject. */ } #define SCHECK_PARTIAL()\ - if (md->partial && eptr > mstart)\ + if (md->partial != 0 && eptr > mstart)\ {\ md->hitend = TRUE;\ if (md->partial > 1) RRETURN(PCRE_ERROR_PARTIAL);\ @@ -2146,7 +2146,11 @@ for (;;) pp = eptr; for (i = min; i < max; i++) { - if (!match_ref(offset, eptr, length, md, ims)) break; + if (!match_ref(offset, eptr, length, md, ims)) + { + CHECK_PARTIAL(); + break; + } eptr += length; } while (eptr >= pp) @@ -2315,7 +2319,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); if (c > 255) { @@ -2341,7 +2349,11 @@ for (;;) { for (i = min; i < max; i++) { - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } c = *eptr; if ((data[c/8] & (1 << (c&7))) == 0) break; eptr++; @@ -2446,7 +2458,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLENTEST(c, eptr, len); if (!_pcre_xclass(c, data)) break; eptr += len; @@ -2685,7 +2701,11 @@ for (;;) eptr <= md->end_subject - oclength && memcmp(eptr, occhars, oclength) == 0) eptr += oclength; #endif /* SUPPORT_UCP */ - else break; + else + { + CHECK_PARTIAL(); + break; + } } if (possessive) continue; @@ -2763,7 +2783,12 @@ for (;;) pp = eptr; for (i = min; i < max; i++) { - if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (fc != md->lcc[*eptr]) break; eptr++; } @@ -2817,7 +2842,12 @@ for (;;) pp = eptr; for (i = min; i < max; i++) { - if (eptr >= md->end_subject || fc != *eptr) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (fc != *eptr) break; eptr++; } if (possessive) continue; @@ -3029,7 +3059,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(d, eptr, len); if (d < 256) d = md->lcc[d]; if (fc == d) break; @@ -3050,7 +3084,12 @@ for (;;) { for (i = min; i < max; i++) { - if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (fc == md->lcc[*eptr]) break; eptr++; } if (possessive) continue; @@ -3159,7 +3198,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(d, eptr, len); if (fc == d) break; eptr += len; @@ -3179,7 +3222,12 @@ for (;;) { for (i = min; i < max; i++) { - if (eptr >= md->end_subject || fc == *eptr) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (fc == *eptr) break; eptr++; } if (possessive) continue; @@ -4335,7 +4383,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); if (prop_fail_result) break; eptr+= len; @@ -4346,7 +4398,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); prop_chartype = UCD_CHARTYPE(c); if ((prop_chartype == ucp_Lu || @@ -4361,7 +4417,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); prop_category = UCD_CATEGORY(c); if ((prop_category == prop_value) == prop_fail_result) @@ -4374,7 +4434,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); prop_chartype = UCD_CHARTYPE(c); if ((prop_chartype == prop_value) == prop_fail_result) @@ -4387,7 +4451,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); prop_script = UCD_SCRIPT(c); if ((prop_script == prop_value) == prop_fail_result) @@ -4416,7 +4484,11 @@ for (;;) { for (i = min; i < max; i++) { - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARINCTEST(c, eptr); prop_category = UCD_CATEGORY(c); if (prop_category == ucp_M) break; @@ -4436,6 +4508,7 @@ for (;;) /* eptr is now past the end of the maximum run */ if (possessive) continue; + for(;;) { RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM45); @@ -4471,7 +4544,12 @@ for (;;) { for (i = min; i < max; i++) { - if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (IS_NEWLINE(eptr)) break; eptr++; while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; } @@ -4483,7 +4561,12 @@ for (;;) { for (i = min; i < max; i++) { - if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (IS_NEWLINE(eptr)) break; eptr++; while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; } @@ -4495,7 +4578,11 @@ for (;;) { for (i = min; i < max; i++) { - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } eptr++; while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; } @@ -4508,15 +4595,22 @@ for (;;) case OP_ANYBYTE: c = max - min; if (c > (unsigned int)(md->end_subject - eptr)) - c = md->end_subject - eptr; - eptr += c; + { + eptr = md->end_subject; + SCHECK_PARTIAL(); + } + else eptr += c; break; case OP_ANYNL: for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); if (c == 0x000d) { @@ -4541,7 +4635,11 @@ for (;;) { BOOL gotspace; int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); switch(c) { @@ -4579,7 +4677,11 @@ for (;;) { BOOL gotspace; int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); switch(c) { @@ -4603,7 +4705,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break; eptr+= len; @@ -4614,7 +4720,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break; eptr+= len; @@ -4625,7 +4735,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break; eptr+= len; @@ -4636,7 +4750,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break; eptr+= len; @@ -4647,7 +4765,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break; eptr+= len; @@ -4658,7 +4780,11 @@ for (;;) for (i = min; i < max; i++) { int len = 1; - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } GETCHARLEN(c, eptr, len); if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break; eptr+= len; @@ -4690,7 +4816,12 @@ for (;;) case OP_ANY: for (i = min; i < max; i++) { - if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } + if (IS_NEWLINE(eptr)) break; eptr++; } break; @@ -4699,14 +4830,21 @@ for (;;) case OP_ANYBYTE: c = max - min; if (c > (unsigned int)(md->end_subject - eptr)) - c = md->end_subject - eptr; - eptr += c; + { + eptr = md->end_subject; + SCHECK_PARTIAL(); + } + else eptr += c; break; case OP_ANYNL: for (i = min; i < max; i++) { - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } c = *eptr; if (c == 0x000d) { @@ -4727,7 +4865,11 @@ for (;;) case OP_NOT_HSPACE: for (i = min; i < max; i++) { - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } c = *eptr; if (c == 0x09 || c == 0x20 || c == 0xa0) break; eptr++; @@ -4737,7 +4879,11 @@ for (;;) case OP_HSPACE: for (i = min; i < max; i++) { - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } c = *eptr; if (c != 0x09 && c != 0x20 && c != 0xa0) break; eptr++; @@ -4747,7 +4893,11 @@ for (;;) case OP_NOT_VSPACE: for (i = min; i < max; i++) { - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } c = *eptr; if (c == 0x0a || c == 0x0b || c == 0x0c || c == 0x0d || c == 0x85) break; @@ -4758,7 +4908,11 @@ for (;;) case OP_VSPACE: for (i = min; i < max; i++) { - if (eptr >= md->end_subject) break; + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); + break; + } c = *eptr; if (c != 0x0a && c != 0x0b && c != 0x0c && c != 0x0d && c != 0x85) break; @@ -4769,8 +4923,12 @@ for (;;) case OP_NOT_DIGIT: for (i = min; i < max; i++) { - if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0) + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); break; + } + if ((md->ctypes[*eptr] & ctype_digit) != 0) break; eptr++; } break; @@ -4778,8 +4936,12 @@ for (;;) case OP_DIGIT: for (i = min; i < max; i++) { - if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0) + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); break; + } + if ((md->ctypes[*eptr] & ctype_digit) == 0) break; eptr++; } break; @@ -4787,8 +4949,12 @@ for (;;) case OP_NOT_WHITESPACE: for (i = min; i < max; i++) { - if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0) + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); break; + } + if ((md->ctypes[*eptr] & ctype_space) != 0) break; eptr++; } break; @@ -4796,8 +4962,12 @@ for (;;) case OP_WHITESPACE: for (i = min; i < max; i++) { - if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0) + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); break; + } + if ((md->ctypes[*eptr] & ctype_space) == 0) break; eptr++; } break; @@ -4805,8 +4975,12 @@ for (;;) case OP_NOT_WORDCHAR: for (i = min; i < max; i++) { - if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0) + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); break; + } + if ((md->ctypes[*eptr] & ctype_word) != 0) break; eptr++; } break; @@ -4814,8 +4988,12 @@ for (;;) case OP_WORDCHAR: for (i = min; i < max; i++) { - if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0) + if (eptr >= md->end_subject) + { + SCHECK_PARTIAL(); break; + } + if ((md->ctypes[*eptr] & ctype_word) == 0) break; eptr++; } break; diff --git a/pcre_internal.h b/pcre_internal.h index a892af9..de09614 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -1210,8 +1210,8 @@ enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, OP_EOD must correspond in order to the list of escapes immediately above. *** NOTE NOTE NOTE *** Whenever this list is updated, the two macro definitions -that follow must also be updated to match. There is also a table called -"coptable" in pcre_dfa_exec.c that must be updated. */ +that follow must also be updated to match. There are also tables called +"coptable" and "poptable" in pcre_dfa_exec.c that must be updated. */ enum { OP_END, /* 0 End of pattern */ @@ -1376,6 +1376,10 @@ enum { OP_SKIPZERO /* 114 */ }; +/* *** NOTE NOTE NOTE *** Whenever the list above is updated, the two macro +definitions that follow must also be updated to match. There are also tables +called "coptable" cna "poptable" in pcre_dfa_exec.c that must be updated. */ + /* This macro defines textual names for all the opcodes. These are used only for debugging. The macro is referenced only in pcre_printint.c. */ diff --git a/testdata/testinput2 b/testdata/testinput2 index 7f887c8..850242e 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -3125,4 +3125,26 @@ a random value. /Ix ** Failers abcdde +/abcd*/ + xxxxabcd\P + xxxxabcd\P\P + +/abcd*/i + xxxxabcd\P + xxxxabcd\P\P + XXXXABCD\P + XXXXABCD\P\P + +/abc\d*/ + xxxxabc1\P + xxxxabc1\P\P + +/(a)bc\1*/ + xxxxabca\P + xxxxabca\P\P + +/abc[de]*/ + xxxxabcde\P + xxxxabcde\P\P + /-- End of testinput2 --/ diff --git a/testdata/testinput5 b/testdata/testinput5 index c1a21ba..82818d7 100644 --- a/testdata/testinput5 +++ b/testdata/testinput5 @@ -720,4 +720,26 @@ can't tell the difference.) --/ the cat\P the cat\P\P +/abcd*/8 + xxxxabcd\P + xxxxabcd\P\P + +/abcd*/i8 + xxxxabcd\P + xxxxabcd\P\P + XXXXABCD\P + XXXXABCD\P\P + +/abc\d*/8 + xxxxabc1\P + xxxxabc1\P\P + +/(a)bc\1*/8 + xxxxabca\P + xxxxabca\P\P + +/abc[de]*/8 + xxxxabcde\P + xxxxabcde\P\P + /-- End of testinput5 --/ diff --git a/testdata/testinput7 b/testdata/testinput7 index f921835..dbc66bb 100644 --- a/testdata/testinput7 +++ b/testdata/testinput7 @@ -4507,4 +4507,22 @@ thejk;adlfj aenjl;fda asdfasd ehj;kjxyasiupd \Ythejk;adlfj aenjl;fda asdfasd ehj;kjxyasiupd +/abcd*/ + xxxxabcd\P + xxxxabcd\P\P + +/abcd*/i + xxxxabcd\P + xxxxabcd\P\P + XXXXABCD\P + XXXXABCD\P\P + +/abc\d*/ + xxxxabc1\P + xxxxabc1\P\P + +/abc[de]*/ + xxxxabcde\P + xxxxabcde\P\P + /-- End of testinput7 --/ diff --git a/testdata/testinput8 b/testdata/testinput8 index e879e81..1c6f684 100644 --- a/testdata/testinput8 +++ b/testdata/testinput8 @@ -667,4 +667,22 @@ /X/8f<any> A\x{1ec5}ABCXYZ +/abcd*/8 + xxxxabcd\P + xxxxabcd\P\P + +/abcd*/i8 + xxxxabcd\P + xxxxabcd\P\P + XXXXABCD\P + XXXXABCD\P\P + +/abc\d*/8 + xxxxabc1\P + xxxxabc1\P\P + +/abc[de]*/8 + xxxxabcde\P + xxxxabcde\P\P + /-- End of testinput8 --/ diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 0d5b61b..646478e 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -10372,4 +10372,39 @@ No match abcdde No match +/abcd*/ + xxxxabcd\P + 0: abcd + xxxxabcd\P\P +Partial match: abcd + +/abcd*/i + xxxxabcd\P + 0: abcd + xxxxabcd\P\P +Partial match: abcd + XXXXABCD\P + 0: ABCD + XXXXABCD\P\P +Partial match: ABCD + +/abc\d*/ + xxxxabc1\P + 0: abc1 + xxxxabc1\P\P +Partial match: abc1 + +/(a)bc\1*/ + xxxxabca\P + 0: abca + 1: a + xxxxabca\P\P +Partial match: abca + +/abc[de]*/ + xxxxabcde\P + 0: abcde + xxxxabcde\P\P +Partial match: abcde + /-- End of testinput2 --/ diff --git a/testdata/testoutput5 b/testdata/testoutput5 index 1aaa5be..f5de747 100644 --- a/testdata/testoutput5 +++ b/testdata/testoutput5 @@ -2037,4 +2037,39 @@ Partial match: \x{123}X\x{123}\x{123}\x{123}\x{123} the cat\P\P Partial match: the cat +/abcd*/8 + xxxxabcd\P + 0: abcd + xxxxabcd\P\P +Partial match: abcd + +/abcd*/i8 + xxxxabcd\P + 0: abcd + xxxxabcd\P\P +Partial match: abcd + XXXXABCD\P + 0: ABCD + XXXXABCD\P\P +Partial match: ABCD + +/abc\d*/8 + xxxxabc1\P + 0: abc1 + xxxxabc1\P\P +Partial match: abc1 + +/(a)bc\1*/8 + xxxxabca\P + 0: abca + 1: a + xxxxabca\P\P +Partial match: abca + +/abc[de]*/8 + xxxxabcde\P + 0: abcde + xxxxabcde\P\P +Partial match: abcde + /-- End of testinput5 --/ diff --git a/testdata/testoutput7 b/testdata/testoutput7 index 8cac766..524450e 100644 --- a/testdata/testoutput7 +++ b/testdata/testoutput7 @@ -7514,4 +7514,38 @@ No match \Ythejk;adlfj aenjl;fda asdfasd ehj;kjxyasiupd No match +/abcd*/ + xxxxabcd\P + 0: abcd + 1: abc + xxxxabcd\P\P +Partial match: abcd + +/abcd*/i + xxxxabcd\P + 0: abcd + 1: abc + xxxxabcd\P\P +Partial match: abcd + XXXXABCD\P + 0: ABCD + 1: ABC + XXXXABCD\P\P +Partial match: ABCD + +/abc\d*/ + xxxxabc1\P + 0: abc1 + 1: abc + xxxxabc1\P\P +Partial match: abc1 + +/abc[de]*/ + xxxxabcde\P + 0: abcde + 1: abcd + 2: abc + xxxxabcde\P\P +Partial match: abcde + /-- End of testinput7 --/ diff --git a/testdata/testoutput8 b/testdata/testoutput8 index d991095..0cc87d7 100644 --- a/testdata/testoutput8 +++ b/testdata/testoutput8 @@ -1286,4 +1286,38 @@ No match A\x{1ec5}ABCXYZ 0: X +/abcd*/8 + xxxxabcd\P + 0: abcd + 1: abc + xxxxabcd\P\P +Partial match: abcd + +/abcd*/i8 + xxxxabcd\P + 0: abcd + 1: abc + xxxxabcd\P\P +Partial match: abcd + XXXXABCD\P + 0: ABCD + 1: ABC + XXXXABCD\P\P +Partial match: ABCD + +/abc\d*/8 + xxxxabc1\P + 0: abc1 + 1: abc + xxxxabc1\P\P +Partial match: abc1 + +/abc[de]*/8 + xxxxabcde\P + 0: abcde + 1: abcd + 2: abc + xxxxabcde\P\P +Partial match: abcde + /-- End of testinput8 --/ |