diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2012-04-20 17:28:23 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2012-04-20 17:28:23 +0000 |
commit | 0ccf556bc0830fdc09d32d52ae317a95fe631ca9 (patch) | |
tree | dea25f018b65a3c9c8ae3cb8e6eff14d1f8ae57e | |
parent | 7a2c66fafd71e7d5cd43ba0cc5648531d3696227 (diff) | |
download | pcre-0ccf556bc0830fdc09d32d52ae317a95fe631ca9.tar.gz |
Fix auto-possessifying bugs when PCRE_UCP is not set, but character tables
specify characters in the range 127-255 are letters, spaces, etc.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@962 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 35 | ||||
-rw-r--r-- | pcre_compile.c | 24 | ||||
-rw-r--r-- | testdata/testinput15 | 36 | ||||
-rw-r--r-- | testdata/testinput18 | 36 | ||||
-rw-r--r-- | testdata/testoutput15 | 136 | ||||
-rw-r--r-- | testdata/testoutput18 | 136 |
6 files changed, 376 insertions, 27 deletions
@@ -9,7 +9,7 @@ Version 8.31 2. Removed a bashism from the RunTest script. 3. Add a cast to pcre_exec.c to fix the warning "unary minus operator applied - to unsigned type, result still unsigned" that was given by an MS compiler + to unsigned type, result still unsigned" that was given by an MS compiler on encountering the code "-sizeof(xxx)". 4. Partial matching support is added to the JIT compiler. @@ -18,13 +18,13 @@ Version 8.31 of more than one character: (a) /^(..)\1/ did not partially match "aba" because checking references was - done on an "all or nothing" basis. This also applied to repeated + done on an "all or nothing" basis. This also applied to repeated references. (b) \R did not give a hard partial match if \r was found at the end of the subject. - (c) \X did not give a hard partial match after matching one or more + (c) \X did not give a hard partial match after matching one or more characters at the end of the subject. (d) When newline was set to CRLF, a pattern such as /a$/ did not recognize @@ -33,11 +33,11 @@ Version 8.31 (e) When newline was set to CRLF, the metacharacter "." did not recognize a partial match for a CR character at the end of the subject string. -6. If JIT is requested using /S++ or -s++ (instead of just /S+ or -s+) when - running pcretest, the text "(JIT)" added to the output whenever JIT is +6. If JIT is requested using /S++ or -s++ (instead of just /S+ or -s+) when + running pcretest, the text "(JIT)" added to the output whenever JIT is actually used to run the match. -7. Individual JIT compile options can be set in pcretest by following -s+[+] +7. Individual JIT compile options can be set in pcretest by following -s+[+] or /S+[+] with a digit between 1 and 7. 8. OP_NOT now supports any UTF character not just single-byte ones. @@ -46,21 +46,21 @@ Version 8.31 10. The command "./RunTest list" lists the available tests without actually running any of them. (Because I keep forgetting what they all are.) - -11. Add PCRE_INFO_MAXLOOKBEHIND. + +11. Add PCRE_INFO_MAXLOOKBEHIND. 12. Applied a (slightly modified) user-supplied patch that improves performance when the heap is used for recursion (compiled with --disable-stack-for- - recursion). Instead of malloc and free for each heap frame each time a - logical recursion happens, frames are retained on a chain and re-used where - possible. This sometimes gives as much as 30% improvement. - + recursion). Instead of malloc and free for each heap frame each time a + logical recursion happens, frames are retained on a chain and re-used where + possible. This sometimes gives as much as 30% improvement. + 13. As documented, (*COMMIT) is now confined to within a recursive subpattern call. 14. As documented, (*COMMIT) is now confined to within a positive assertion. -15. It is now possible to link pcretest with libedit as an alternative to +15. It is now possible to link pcretest with libedit as an alternative to libreadline. 16. (*COMMIT) control verb is now supported by the JIT compiler. @@ -86,10 +86,15 @@ Version 8.31 matches in certain environments (the workspace was not being correctly retained). Also added to pcre_dfa_exec() a simple plausibility check on some of the workspace data at the beginning of a restart. - + 25. \s*\R was auto-possessifying the \s* when it should not, whereas \S*\R was not doing so when it should - probably a typo introduced by SVN 528 - (change 8.10/14). + (change 8.10/14). + +26. When PCRE_UCP was not set, \w+\x{c4} was incorrectly auto-possessifying the + \w+ when the character tables indicated that \x{c4} was a word character. + There were several related cases, all because the tests for doing a table + lookup were testing for characters less than 127 instead of 255. Version 8.30 04-February-2012 diff --git a/pcre_compile.c b/pcre_compile.c index b267f31..07b8a00 100644 --- a/pcre_compile.c +++ b/pcre_compile.c @@ -3132,22 +3132,22 @@ if (next >= 0) switch(op_code) When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */ case OP_DIGIT: - return next > 127 || (cd->ctypes[next] & ctype_digit) == 0; + return next > 255 || (cd->ctypes[next] & ctype_digit) == 0; case OP_NOT_DIGIT: - return next <= 127 && (cd->ctypes[next] & ctype_digit) != 0; + return next <= 255 && (cd->ctypes[next] & ctype_digit) != 0; case OP_WHITESPACE: - return next > 127 || (cd->ctypes[next] & ctype_space) == 0; + return next > 255 || (cd->ctypes[next] & ctype_space) == 0; case OP_NOT_WHITESPACE: - return next <= 127 && (cd->ctypes[next] & ctype_space) != 0; + return next <= 255 && (cd->ctypes[next] & ctype_space) != 0; case OP_WORDCHAR: - return next > 127 || (cd->ctypes[next] & ctype_word) == 0; + return next > 255 || (cd->ctypes[next] & ctype_word) == 0; case OP_NOT_WORDCHAR: - return next <= 127 && (cd->ctypes[next] & ctype_word) != 0; + return next <= 255 && (cd->ctypes[next] & ctype_word) != 0; case OP_HSPACE: case OP_NOT_HSPACE: @@ -3225,22 +3225,22 @@ switch(op_code) switch(-next) { case ESC_d: - return c > 127 || (cd->ctypes[c] & ctype_digit) == 0; + return c > 255 || (cd->ctypes[c] & ctype_digit) == 0; case ESC_D: - return c <= 127 && (cd->ctypes[c] & ctype_digit) != 0; + return c <= 255 && (cd->ctypes[c] & ctype_digit) != 0; case ESC_s: - return c > 127 || (cd->ctypes[c] & ctype_space) == 0; + return c > 255 || (cd->ctypes[c] & ctype_space) == 0; case ESC_S: - return c <= 127 && (cd->ctypes[c] & ctype_space) != 0; + return c <= 255 && (cd->ctypes[c] & ctype_space) != 0; case ESC_w: - return c > 127 || (cd->ctypes[c] & ctype_word) == 0; + return c > 255 || (cd->ctypes[c] & ctype_word) == 0; case ESC_W: - return c <= 127 && (cd->ctypes[c] & ctype_word) != 0; + return c <= 255 && (cd->ctypes[c] & ctype_word) != 0; case ESC_h: case ESC_H: diff --git a/testdata/testinput15 b/testdata/testinput15 index f995646..48e1b03 100644 --- a/testdata/testinput15 +++ b/testdata/testinput15 @@ -277,4 +277,40 @@ correctly, but that messes up comparisons). --/ /\777/8DZ +/\w+\x{C4}/8BZ + a\x{C4}\x{C4} + +/\w+\x{C4}/8BZT1 + a\x{C4}\x{C4} + +/\W+\x{C4}/8BZ + !\x{C4} + +/\W+\x{C4}/8BZT1 + !\x{C4} + +/\W+\x{A1}/8BZ + !\x{A1} + +/\W+\x{A1}/8BZT1 + !\x{A1} + +/X\s+\x{A0}/8BZ + X\x20\x{A0}\x{A0} + +/X\s+\x{A0}/8BZT1 + X\x20\x{A0}\x{A0} + +/\S+\x{A0}/8BZ + X\x{A0}\x{A0} + +/\S+\x{A0}/8BZT1 + X\x{A0}\x{A0} + +/\x{a0}+\s!/8BZ + \x{a0}\x20! + +/\x{a0}+\s!/8BZT1 + \x{a0}\x20! + /-- End of testinput15 --/ diff --git a/testdata/testinput18 b/testdata/testinput18 index e6c77a8..d0e46dd 100644 --- a/testdata/testinput18 +++ b/testdata/testinput18 @@ -240,4 +240,40 @@ correctly, but that messes up comparisons). --/ /νΌ€/8 +/\w+\x{C4}/8BZ + a\x{C4}\x{C4} + +/\w+\x{C4}/8BZT1 + a\x{C4}\x{C4} + +/\W+\x{C4}/8BZ + !\x{C4} + +/\W+\x{C4}/8BZT1 + !\x{C4} + +/\W+\x{A1}/8BZ + !\x{A1} + +/\W+\x{A1}/8BZT1 + !\x{A1} + +/X\s+\x{A0}/8BZ + X\x20\x{A0}\x{A0} + +/X\s+\x{A0}/8BZT1 + X\x20\x{A0}\x{A0} + +/\S+\x{A0}/8BZ + X\x{A0}\x{A0} + +/\S+\x{A0}/8BZT1 + X\x{A0}\x{A0} + +/\x{a0}+\s!/8BZ + \x{a0}\x20! + +/\x{a0}+\s!/8BZT1 + \x{a0}\x20! + /-- End of testinput18 --/ diff --git a/testdata/testoutput15 b/testdata/testoutput15 index c965939..b145ea6 100644 --- a/testdata/testoutput15 +++ b/testdata/testoutput15 @@ -910,4 +910,140 @@ Options: utf First char = \x{c7} Need char = \x{bf} +/\w+\x{C4}/8BZ +------------------------------------------------------------------ + Bra + \w++ + \x{c4} + Ket + End +------------------------------------------------------------------ + a\x{C4}\x{C4} + 0: a\x{c4} + +/\w+\x{C4}/8BZT1 +------------------------------------------------------------------ + Bra + \w+ + \x{c4} + Ket + End +------------------------------------------------------------------ + a\x{C4}\x{C4} + 0: a\x{c4}\x{c4} + +/\W+\x{C4}/8BZ +------------------------------------------------------------------ + Bra + \W+ + \x{c4} + Ket + End +------------------------------------------------------------------ + !\x{C4} + 0: !\x{c4} + +/\W+\x{C4}/8BZT1 +------------------------------------------------------------------ + Bra + \W++ + \x{c4} + Ket + End +------------------------------------------------------------------ + !\x{C4} + 0: !\x{c4} + +/\W+\x{A1}/8BZ +------------------------------------------------------------------ + Bra + \W+ + \x{a1} + Ket + End +------------------------------------------------------------------ + !\x{A1} + 0: !\x{a1} + +/\W+\x{A1}/8BZT1 +------------------------------------------------------------------ + Bra + \W+ + \x{a1} + Ket + End +------------------------------------------------------------------ + !\x{A1} + 0: !\x{a1} + +/X\s+\x{A0}/8BZ +------------------------------------------------------------------ + Bra + X + \s++ + \x{a0} + Ket + End +------------------------------------------------------------------ + X\x20\x{A0}\x{A0} + 0: X \x{a0} + +/X\s+\x{A0}/8BZT1 +------------------------------------------------------------------ + Bra + X + \s+ + \x{a0} + Ket + End +------------------------------------------------------------------ + X\x20\x{A0}\x{A0} + 0: X \x{a0}\x{a0} + +/\S+\x{A0}/8BZ +------------------------------------------------------------------ + Bra + \S+ + \x{a0} + Ket + End +------------------------------------------------------------------ + X\x{A0}\x{A0} + 0: X\x{a0}\x{a0} + +/\S+\x{A0}/8BZT1 +------------------------------------------------------------------ + Bra + \S++ + \x{a0} + Ket + End +------------------------------------------------------------------ + X\x{A0}\x{A0} + 0: X\x{a0} + +/\x{a0}+\s!/8BZ +------------------------------------------------------------------ + Bra + \x{a0}++ + \s + ! + Ket + End +------------------------------------------------------------------ + \x{a0}\x20! + 0: \x{a0} ! + +/\x{a0}+\s!/8BZT1 +------------------------------------------------------------------ + Bra + \x{a0}+ + \s + ! + Ket + End +------------------------------------------------------------------ + \x{a0}\x20! + 0: \x{a0} ! + /-- End of testinput15 --/ diff --git a/testdata/testoutput18 b/testdata/testoutput18 index a129854..adc3ab4 100644 --- a/testdata/testoutput18 +++ b/testdata/testoutput18 @@ -845,4 +845,140 @@ Error -24 (bad offset value) /νΌ€/8 Failed: invalid UTF-16 string at offset 0 +/\w+\x{C4}/8BZ +------------------------------------------------------------------ + Bra + \w++ + \xc4 + Ket + End +------------------------------------------------------------------ + a\x{C4}\x{C4} + 0: a\x{c4} + +/\w+\x{C4}/8BZT1 +------------------------------------------------------------------ + Bra + \w+ + \xc4 + Ket + End +------------------------------------------------------------------ + a\x{C4}\x{C4} + 0: a\x{c4}\x{c4} + +/\W+\x{C4}/8BZ +------------------------------------------------------------------ + Bra + \W+ + \xc4 + Ket + End +------------------------------------------------------------------ + !\x{C4} + 0: !\x{c4} + +/\W+\x{C4}/8BZT1 +------------------------------------------------------------------ + Bra + \W++ + \xc4 + Ket + End +------------------------------------------------------------------ + !\x{C4} + 0: !\x{c4} + +/\W+\x{A1}/8BZ +------------------------------------------------------------------ + Bra + \W+ + \xa1 + Ket + End +------------------------------------------------------------------ + !\x{A1} + 0: !\x{a1} + +/\W+\x{A1}/8BZT1 +------------------------------------------------------------------ + Bra + \W+ + \xa1 + Ket + End +------------------------------------------------------------------ + !\x{A1} + 0: !\x{a1} + +/X\s+\x{A0}/8BZ +------------------------------------------------------------------ + Bra + X + \s++ + \xa0 + Ket + End +------------------------------------------------------------------ + X\x20\x{A0}\x{A0} + 0: X \x{a0} + +/X\s+\x{A0}/8BZT1 +------------------------------------------------------------------ + Bra + X + \s+ + \xa0 + Ket + End +------------------------------------------------------------------ + X\x20\x{A0}\x{A0} + 0: X \x{a0}\x{a0} + +/\S+\x{A0}/8BZ +------------------------------------------------------------------ + Bra + \S+ + \xa0 + Ket + End +------------------------------------------------------------------ + X\x{A0}\x{A0} + 0: X\x{a0}\x{a0} + +/\S+\x{A0}/8BZT1 +------------------------------------------------------------------ + Bra + \S++ + \xa0 + Ket + End +------------------------------------------------------------------ + X\x{A0}\x{A0} + 0: X\x{a0} + +/\x{a0}+\s!/8BZ +------------------------------------------------------------------ + Bra + \xa0++ + \s + ! + Ket + End +------------------------------------------------------------------ + \x{a0}\x20! + 0: \x{a0} ! + +/\x{a0}+\s!/8BZT1 +------------------------------------------------------------------ + Bra + \xa0+ + \s + ! + Ket + End +------------------------------------------------------------------ + \x{a0}\x20! + 0: \x{a0} ! + /-- End of testinput18 --/ |