diff options
author | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2007-03-09 12:23:37 +0000 |
---|---|---|
committer | ph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15> | 2007-03-09 12:23:37 +0000 |
commit | e5fdca3902baa3216086381cdcd6d84d2808b9b1 (patch) | |
tree | 2061236a55ccaeff54a0b4bc4c7ad7d0b98495b9 | |
parent | 2c2f5db4584bc152186271aed3ec4a2fe7c9e322 (diff) | |
download | pcre-e5fdca3902baa3216086381cdcd6d84d2808b9b1.tar.gz |
Fix two obscure, but nasty bugs concerned with caseless matching with Unicode
property support.
git-svn-id: svn://vcs.exim.org/pcre/code/trunk@115 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r-- | ChangeLog | 17 | ||||
-rw-r--r-- | pcre_exec.c | 27 | ||||
-rw-r--r-- | testdata/testinput6 | 10 | ||||
-rw-r--r-- | testdata/testinput9 | 6 | ||||
-rw-r--r-- | testdata/testoutput6 | 14 | ||||
-rw-r--r-- | testdata/testoutput9 | 8 |
6 files changed, 79 insertions, 3 deletions
@@ -70,6 +70,23 @@ Version 7.1 05-Mar-07 pcre.h.generic and config.h.generic from pcre.h and config.h. In the latter case, it wraps all the #defines with #ifndefs. This script should be run before "make dist". + +12. Fixed two fairly obscure bugs concerned with quantified caseless matching + with Unicode property support. + + (a) For a maximizing quantifier, if the two different cases of the + character were of different lengths in their UTF-8 codings (there are + some cases like this - I found 11), and the matching function had to + back up over a mixture of the two cases, it incorrectly assumed they + were both the same length. + + (b) When PCRE was configured to use the heap rather than the stack for + recursion during matching, it was not correctly preserving the data for + the other case of a UTF-8 character when checking ahead for a match + while processing a minimizing repeat. If the check also involved + matching a wide character, but failed, corruption could cause an + erroneous result when trying to check for a repeat of the original + character. Version 7.0 19-Dec-06 diff --git a/pcre_exec.c b/pcre_exec.c index 8156fd2..0d80f2e 100644 --- a/pcre_exec.c +++ b/pcre_exec.c @@ -299,6 +299,8 @@ typedef struct heapframe { int Xprop_category; int Xprop_chartype; int Xprop_script; + int Xoclength; + uschar Xocchars[8]; #endif int Xctype; @@ -441,6 +443,8 @@ HEAP_RECURSE: #define prop_category frame->Xprop_category #define prop_chartype frame->Xprop_chartype #define prop_script frame->Xprop_script +#define oclength frame->Xoclength +#define occhars frame->Xocchars #endif #define ctype frame->Xctype @@ -494,6 +498,8 @@ int prop_fail_result; int prop_category; int prop_chartype; int prop_script; +int oclength; +uschar occhars[8]; #endif int ctype; @@ -2045,19 +2051,18 @@ for (;;) if (length > 1) { - int oclength = 0; - uschar occhars[8]; - #ifdef SUPPORT_UCP unsigned int othercase; if ((ims & PCRE_CASELESS) != 0 && (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR) oclength = _pcre_ord2utf8(othercase, occhars); + else oclength = 0; #endif /* SUPPORT_UCP */ for (i = 1; i <= min; i++) { if (memcmp(eptr, charptr, length) == 0) eptr += length; +#ifdef SUPPORT_UCP /* Need braces because of following else */ else if (oclength == 0) { RRETURN(MATCH_NOMATCH); } else @@ -2065,6 +2070,9 @@ for (;;) if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH); eptr += oclength; } +#else /* without SUPPORT_UCP */ + else { RRETURN(MATCH_NOMATCH); } +#endif /* SUPPORT_UCP */ } if (min == max) continue; @@ -2077,6 +2085,7 @@ for (;;) if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); if (memcmp(eptr, charptr, length) == 0) eptr += length; +#ifdef SUPPORT_UCP /* Need braces because of following else */ else if (oclength == 0) { RRETURN(MATCH_NOMATCH); } else @@ -2084,6 +2093,9 @@ for (;;) if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH); eptr += oclength; } +#else /* without SUPPORT_UCP */ + else { RRETURN (MATCH_NOMATCH); } +#endif /* SUPPORT_UCP */ } /* Control never gets here */ } @@ -2095,12 +2107,16 @@ for (;;) { if (eptr > md->end_subject - length) break; if (memcmp(eptr, charptr, length) == 0) eptr += length; +#ifdef SUPPORT_UCP else if (oclength == 0) break; else { if (memcmp(eptr, occhars, oclength) != 0) break; eptr += oclength; } +#else /* without SUPPORT_UCP */ + else break; +#endif /* SUPPORT_UCP */ } if (possessive) continue; @@ -2108,7 +2124,12 @@ for (;;) { RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); if (rrc != MATCH_NOMATCH) RRETURN(rrc); +#ifdef SUPPORT_UCP + eptr--; + BACKCHAR(eptr); +#else /* without SUPPORT_UCP */ eptr -= length; +#endif /* SUPPORT_UCP */ } RRETURN(MATCH_NOMATCH); } diff --git a/testdata/testinput6 b/testdata/testinput6 index 5a541f3..a0de1f1 100644 --- a/testdata/testinput6 +++ b/testdata/testinput6 @@ -762,4 +762,14 @@ of case for anything other than the ASCII letters. / /^\p{Balinese}\p{Cuneiform}\p{Nko}\p{Phags_Pa}\p{Phoenician}/8 \x{1b00}\x{12000}\x{7c0}\x{a840}\x{10900} +/The next two are special cases where the lengths of the different cases of the +same character differ. The first went wrong with heap fram storage; the 2nd +was broken in all cases./ + +/^\x{023a}+?(\x{0130}+)/8i + \x{023a}\x{2c65}\x{0130} + +/^\x{023a}+([^X])/8i + \x{023a}\x{2c65}X + / End of testinput6 / diff --git a/testdata/testinput9 b/testdata/testinput9 index 23d3d3e..e11dda1 100644 --- a/testdata/testinput9 +++ b/testdata/testinput9 @@ -813,4 +813,10 @@ \x{1c5}XY AXY +/^\x{023a}+?(\x{0130}+)/8i + \x{023a}\x{2c65}\x{0130} + +/^\x{023a}+([^X])/8i + \x{023a}\x{2c65}X + / End / diff --git a/testdata/testoutput6 b/testdata/testoutput6 index a8bb0ad..3b907a4 100644 --- a/testdata/testoutput6 +++ b/testdata/testoutput6 @@ -1432,4 +1432,18 @@ of case for anything other than the ASCII letters. / \x{1b00}\x{12000}\x{7c0}\x{a840}\x{10900} 0: \x{1b00}\x{12000}\x{7c0}\x{a840}\x{10900} +/The next two are special cases where the lengths of the different cases of the +same character differ. The first went wrong with heap fram storage; the 2nd +was broken in all cases./ + +/^\x{023a}+?(\x{0130}+)/8i + \x{023a}\x{2c65}\x{0130} + 0: \x{23a}\x{2c65}\x{130} + 1: \x{130} + +/^\x{023a}+([^X])/8i + \x{023a}\x{2c65}X + 0: \x{23a}\x{2c65} + 1: \x{2c65} + / End of testinput6 / diff --git a/testdata/testoutput9 b/testdata/testoutput9 index 000e2b9..2cc61ac 100644 --- a/testdata/testoutput9 +++ b/testdata/testoutput9 @@ -1624,4 +1624,12 @@ No match AXY No match +/^\x{023a}+?(\x{0130}+)/8i + \x{023a}\x{2c65}\x{0130} + 0: \x{23a}\x{2c65}\x{130} + +/^\x{023a}+([^X])/8i + \x{023a}\x{2c65}X + 0: \x{23a}\x{2c65} + / End / |