diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-11-15 17:35:10 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2011-11-15 17:35:10 +0000 |
commit | 88d27ef1f5a58734c2f576c64bddd92e912e519a (patch) | |
tree | c8f240d8a1a05146857647c9c0ac7a0d7f51e866 | |
parent | f021e15fd4f782eed4d409c68d8cbffc5200fb3b (diff) | |
download | pcre-88d27ef1f5a58734c2f576c64bddd92e912e519a.tar.gz |
Fixed several items that were being incorrectly rejected as "not fixed length"
in lookbehinds.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@747 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 10 | ||||
-rw-r--r-- | pcre_compile.c | 154 | ||||
-rw-r--r-- | pcre_internal.h | 2 | ||||
-rw-r--r-- | pcreposix.c | 2 | ||||
-rw-r--r-- | testdata/testinput1 | 24 | ||||
-rw-r--r-- | testdata/testinput11 | 27 | ||||
-rw-r--r-- | testdata/testoutput1 | 36 | ||||
-rw-r--r-- | testdata/testoutput11 | 41 |
8 files changed, 273 insertions, 23 deletions
@@ -23,6 +23,16 @@ Version 8.21 6. Lookbehinds such as (?<=a{2}b) that contained a fixed repetition were erroneously being rejected as "not fixed length" if PCRE_CASELESS was set. This bug was probably introduced by change 9 of 8.13. + +7. While fixing 6 above, I noticed that a number of other items were being + incorrectly rejected as "not fixed length". This arose partly because newer + opcodes had not been added to the fixed-length checking code. I have (a) + corrected the bug and added tests for these items, and (b) arranged for an + error to occur if an unknown opcode is encountered while checking for fixed + length instead of just assuming "not fixed length". The items that were + rejected were: (*ACCEPT), (*COMMIT), (*FAIL), (*MARK), (*PRUNE), (*SKIP), + (*THEN), \h, \H, \v, \V, and single character negative classes with fixed + repetitions, e.g. [^a]{3}, with and without PCRE_CASELESS. Version 8.20 21-Oct-2011 diff --git a/pcre_compile.c b/pcre_compile.c index 0f875d0..2a49b3b 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -410,6 +410,8 @@ static const char error_texts[] = "this version of PCRE is not compiled with PCRE_UCP support\0" "\\c must be followed by an ASCII character\0" "\\k is not followed by a braced, angle-bracketed, or quoted name\0" + /* 70 */ + "internal error: unknown opcode in find_fixedlength()\0" ; /* Table to identify digits and hex digits. This is used when compiling @@ -1528,6 +1530,7 @@ Returns: the fixed length, or -1 if there is no fixed length, or -2 if \C was encountered or -3 if an OP_RECURSE item was encountered and atend is FALSE + or -4 if an unknown opcode was encountered (internal error) */ static int @@ -1551,8 +1554,7 @@ for (;;) /* We only need to continue for OP_CBRA (normal capturing bracket) and OP_BRA (normal non-capturing bracket) because the other variants of these opcodes are all concerned with unlimited repeated groups, which of course - are not of fixed length. They will cause a -1 response from the default - case of this switch. */ + are not of fixed length. */ case OP_CBRA: case OP_BRA: @@ -1566,15 +1568,17 @@ for (;;) cc += 1 + LINK_SIZE; break; - /* Reached end of a branch; if it's a ket it is the end of a nested - call. If it's ALT it is an alternation in a nested call. If it is - END it's the end of the outer call. All can be handled by the same code. - Note that we must not include the OP_KETRxxx opcodes here, because they - all imply an unlimited repeat. */ + /* Reached end of a branch; if it's a ket it is the end of a nested call. + If it's ALT it is an alternation in a nested call. An ACCEPT is effectively + an ALT. If it is END it's the end of the outer call. All can be handled by + the same code. Note that we must not include the OP_KETRxxx opcodes here, + because they all imply an unlimited repeat. */ case OP_ALT: case OP_KET: case OP_END: + case OP_ACCEPT: + case OP_ASSERT_ACCEPT: if (length < 0) length = branchlength; else if (length != branchlength) return -1; if (*cc != OP_ALT) return length; @@ -1608,23 +1612,36 @@ for (;;) /* Skip over things that don't match chars */ - case OP_REVERSE: - case OP_CREF: - case OP_NCREF: - case OP_RREF: - case OP_NRREF: - case OP_DEF: + case OP_MARK: + case OP_PRUNE_ARG: + case OP_SKIP_ARG: + case OP_THEN_ARG: + cc += cc[1] + _pcre_OP_lengths[*cc]; + break; + case OP_CALLOUT: - case OP_SOD: - case OP_SOM: - case OP_SET_SOM: - case OP_EOD: - case OP_EODN: case OP_CIRC: case OP_CIRCM: + case OP_CLOSE: + case OP_COMMIT: + case OP_CREF: + case OP_DEF: case OP_DOLL: case OP_DOLLM: + case OP_EOD: + case OP_EODN: + case OP_FAIL: + case OP_NCREF: + case OP_NRREF: case OP_NOT_WORD_BOUNDARY: + case OP_PRUNE: + case OP_REVERSE: + case OP_RREF: + case OP_SET_SOM: + case OP_SKIP: + case OP_SOD: + case OP_SOM: + case OP_THEN: case OP_WORD_BOUNDARY: cc += _pcre_OP_lengths[*cc]; break; @@ -1646,7 +1663,9 @@ for (;;) need to skip over a multibyte character in UTF8 mode. */ case OP_EXACT: - case OP_EXACTI: + case OP_EXACTI: + case OP_NOTEXACT: + case OP_NOTEXACTI: branchlength += GET2(cc,1); cc += 4; #ifdef SUPPORT_UTF8 @@ -1667,6 +1686,10 @@ for (;;) cc += 2; /* Fall through */ + case OP_HSPACE: + case OP_VSPACE: + case OP_NOT_HSPACE: + case OP_NOT_VSPACE: case OP_NOT_DIGIT: case OP_DIGIT: case OP_NOT_WHITESPACE: @@ -1698,6 +1721,8 @@ for (;;) switch (*cc) { + case OP_CRPLUS: + case OP_CRMINPLUS: case OP_CRSTAR: case OP_CRMINSTAR: case OP_CRQUERY: @@ -1718,8 +1743,91 @@ for (;;) /* Anything else is variable length */ - default: + case OP_ANYNL: + case OP_BRAMINZERO: + case OP_BRAPOS: + case OP_BRAPOSZERO: + case OP_BRAZERO: + case OP_CBRAPOS: + case OP_EXTUNI: + case OP_KETRMAX: + case OP_KETRMIN: + case OP_KETRPOS: + case OP_MINPLUS: + case OP_MINPLUSI: + case OP_MINQUERY: + case OP_MINQUERYI: + case OP_MINSTAR: + case OP_MINSTARI: + case OP_MINUPTO: + case OP_MINUPTOI: + case OP_NOTMINPLUS: + case OP_NOTMINPLUSI: + case OP_NOTMINQUERY: + case OP_NOTMINQUERYI: + case OP_NOTMINSTAR: + case OP_NOTMINSTARI: + case OP_NOTMINUPTO: + case OP_NOTMINUPTOI: + case OP_NOTPLUS: + case OP_NOTPLUSI: + case OP_NOTPOSPLUS: + case OP_NOTPOSPLUSI: + case OP_NOTPOSQUERY: + case OP_NOTPOSQUERYI: + case OP_NOTPOSSTAR: + case OP_NOTPOSSTARI: + case OP_NOTPOSUPTO: + case OP_NOTPOSUPTOI: + case OP_NOTQUERY: + case OP_NOTQUERYI: + case OP_NOTSTAR: + case OP_NOTSTARI: + case OP_NOTUPTO: + case OP_NOTUPTOI: + case OP_PLUS: + case OP_PLUSI: + case OP_POSPLUS: + case OP_POSPLUSI: + case OP_POSQUERY: + case OP_POSQUERYI: + case OP_POSSTAR: + case OP_POSSTARI: + case OP_POSUPTO: + case OP_POSUPTOI: + case OP_QUERY: + case OP_QUERYI: + case OP_REF: + case OP_REFI: + case OP_SBRA: + case OP_SBRAPOS: + case OP_SCBRA: + case OP_SCBRAPOS: + case OP_SCOND: + case OP_SKIPZERO: + case OP_STAR: + case OP_STARI: + case OP_TYPEMINPLUS: + case OP_TYPEMINQUERY: + case OP_TYPEMINSTAR: + case OP_TYPEMINUPTO: + case OP_TYPEPLUS: + case OP_TYPEPOSPLUS: + case OP_TYPEPOSQUERY: + case OP_TYPEPOSSTAR: + case OP_TYPEPOSUPTO: + case OP_TYPEQUERY: + case OP_TYPESTAR: + case OP_TYPEUPTO: + case OP_UPTO: + case OP_UPTOI: return -1; + + /* Catch unrecognized opcodes so that when new ones are added they + are not forgotten, as has happened in the past. */ + + default: + return -4; } } /* Control never gets here */ @@ -6615,7 +6723,8 @@ for (;;) } else if (fixed_length < 0) { - *errorcodeptr = (fixed_length == -2)? ERR36 : ERR25; + *errorcodeptr = (fixed_length == -2)? ERR36 : + (fixed_length == -4)? ERR70: ERR25; *ptrptr = ptr; return FALSE; } @@ -7414,7 +7523,8 @@ if (cd->check_lookbehind) DPRINTF(("fixed length = %d\n", fixed_length)); if (fixed_length < 0) { - errorcode = (fixed_length == -2)? ERR36 : ERR25; + errorcode = (fixed_length == -2)? ERR36 : + (fixed_length == -4)? ERR70 : ERR25; break; } PUT(cc, 1, fixed_length); diff --git a/pcre_internal.h b/pcre_internal.h index faf1b76..2d02e5d 100644 --- a/pcre_internal.h +++ b/pcre_internal.h @@ -1665,7 +1665,7 @@ enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69, - ERRCOUNT }; + ERR70, ERRCOUNT }; /* The real format of the start of the pcre block; the index of names and the code vector run on as long as necessary after the end. We store an explicit diff --git a/pcreposix.c b/pcreposix.c index 2061be0..648254b 100644 --- a/pcreposix.c +++ b/pcreposix.c @@ -153,6 +153,8 @@ static const int eint[] = { REG_INVARG, /* this version of PCRE is not compiled with PCRE_UCP support */ REG_BADPAT, /* \c must be followed by an ASCII character */ REG_BADPAT, /* \k is not followed by a braced, angle-bracketed, or quoted name */ + /* 70 */ + REG_BADPAT, /* internal error: unknown opcode in find_fixedlength() */ }; /* Table of texts corresponding to POSIX error codes */ diff --git a/testdata/testinput1 b/testdata/testinput1 index aa9ce42..b24f900 100644 --- a/testdata/testinput1 +++ b/testdata/testinput1 @@ -4261,4 +4261,28 @@ ** Failers xaabc +/(?<=a\h)c/ + xa c + +/(?<=[^a]{2})b/ + axxbc + aAAbc + ** Failers + xaabc + +/(?<=[^a]{2})b/i + axxbc + ** Failers + aAAbc + xaabc + +/(?<=a\H)c/ + abc + +/(?<=a\V)c/ + abc + +/(?<=a\v)c/ + a\nc + /-- End of testinput1 --/ diff --git a/testdata/testinput11 b/testdata/testinput11 index 5849eb0..a9d1cfd 100644 --- a/testdata/testinput11 +++ b/testdata/testinput11 @@ -773,4 +773,31 @@ name)/K /(?>(a)(*:m))/imsxSK a +/(?<=a(*ACCEPT)b)c/ + xacd + +/(?<=(a(*ACCEPT)b))c/ + xacd + +/(?<=(a(*COMMIT)b))c/ + xabcd + ** Failers + xacd + +/(?<!a(*FAIL)b)c/ + xcd + acd + +/(?<=a(*:N)b)c/K + xabcd + +/(?<=a(*PRUNE)b)c/ + xabcd + +/(?<=a(*SKIP)b)c/ + xabcd + +/(?<=a(*THEN)b)c/ + xabcd + /-- End of testinput11 --/ diff --git a/testdata/testoutput1 b/testdata/testoutput1 index 5a025e2..0c2e84e 100644 --- a/testdata/testoutput1 +++ b/testdata/testoutput1 @@ -6968,4 +6968,40 @@ No match xaabc No match +/(?<=a\h)c/ + xa c + 0: c + +/(?<=[^a]{2})b/ + axxbc + 0: b + aAAbc + 0: b + ** Failers +No match + xaabc +No match + +/(?<=[^a]{2})b/i + axxbc + 0: b + ** Failers +No match + aAAbc +No match + xaabc +No match + +/(?<=a\H)c/ + abc + 0: c + +/(?<=a\V)c/ + abc + 0: c + +/(?<=a\v)c/ + a\nc + 0: c + /-- End of testinput1 --/ diff --git a/testdata/testoutput11 b/testdata/testoutput11 index e3df0eb..7fc086f 100644 --- a/testdata/testoutput11 +++ b/testdata/testoutput11 @@ -1400,4 +1400,45 @@ MK: m 1: a MK: m +/(?<=a(*ACCEPT)b)c/ + xacd + 0: c + +/(?<=(a(*ACCEPT)b))c/ + xacd + 0: c + 1: a + +/(?<=(a(*COMMIT)b))c/ + xabcd + 0: c + 1: ab + ** Failers +No match + xacd +No match + +/(?<!a(*FAIL)b)c/ + xcd + 0: c + acd + 0: c + +/(?<=a(*:N)b)c/K + xabcd + 0: c +MK: N + +/(?<=a(*PRUNE)b)c/ + xabcd + 0: c + +/(?<=a(*SKIP)b)c/ + xabcd + 0: c + +/(?<=a(*THEN)b)c/ + xabcd + 0: c + /-- End of testinput11 --/ |