summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-03-09 12:23:37 +0000
committerph10 <ph10@2f5784b3-3f2a-0410-8824-cb99058d5e15>2007-03-09 12:23:37 +0000
commite5fdca3902baa3216086381cdcd6d84d2808b9b1 (patch)
tree2061236a55ccaeff54a0b4bc4c7ad7d0b98495b9
parent2c2f5db4584bc152186271aed3ec4a2fe7c9e322 (diff)
downloadpcre-e5fdca3902baa3216086381cdcd6d84d2808b9b1.tar.gz
Fix two obscure, but nasty bugs concerned with caseless matching with Unicode
property support. git-svn-id: svn://vcs.exim.org/pcre/code/trunk@115 2f5784b3-3f2a-0410-8824-cb99058d5e15
-rw-r--r--ChangeLog17
-rw-r--r--pcre_exec.c27
-rw-r--r--testdata/testinput610
-rw-r--r--testdata/testinput96
-rw-r--r--testdata/testoutput614
-rw-r--r--testdata/testoutput98
6 files changed, 79 insertions, 3 deletions
diff --git a/ChangeLog b/ChangeLog
index cb2632e..b1adf50 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -70,6 +70,23 @@ Version 7.1 05-Mar-07
pcre.h.generic and config.h.generic from pcre.h and config.h. In the latter
case, it wraps all the #defines with #ifndefs. This script should be run
before "make dist".
+
+12. Fixed two fairly obscure bugs concerned with quantified caseless matching
+ with Unicode property support.
+
+ (a) For a maximizing quantifier, if the two different cases of the
+ character were of different lengths in their UTF-8 codings (there are
+ some cases like this - I found 11), and the matching function had to
+ back up over a mixture of the two cases, it incorrectly assumed they
+ were both the same length.
+
+ (b) When PCRE was configured to use the heap rather than the stack for
+ recursion during matching, it was not correctly preserving the data for
+ the other case of a UTF-8 character when checking ahead for a match
+ while processing a minimizing repeat. If the check also involved
+ matching a wide character, but failed, corruption could cause an
+ erroneous result when trying to check for a repeat of the original
+ character.
Version 7.0 19-Dec-06
diff --git a/pcre_exec.c b/pcre_exec.c
index 8156fd2..0d80f2e 100644
--- a/pcre_exec.c
+++ b/pcre_exec.c
@@ -299,6 +299,8 @@ typedef struct heapframe {
int Xprop_category;
int Xprop_chartype;
int Xprop_script;
+ int Xoclength;
+ uschar Xocchars[8];
#endif
int Xctype;
@@ -441,6 +443,8 @@ HEAP_RECURSE:
#define prop_category frame->Xprop_category
#define prop_chartype frame->Xprop_chartype
#define prop_script frame->Xprop_script
+#define oclength frame->Xoclength
+#define occhars frame->Xocchars
#endif
#define ctype frame->Xctype
@@ -494,6 +498,8 @@ int prop_fail_result;
int prop_category;
int prop_chartype;
int prop_script;
+int oclength;
+uschar occhars[8];
#endif
int ctype;
@@ -2045,19 +2051,18 @@ for (;;)
if (length > 1)
{
- int oclength = 0;
- uschar occhars[8];
-
#ifdef SUPPORT_UCP
unsigned int othercase;
if ((ims & PCRE_CASELESS) != 0 &&
(othercase = _pcre_ucp_othercase(fc)) != NOTACHAR)
oclength = _pcre_ord2utf8(othercase, occhars);
+ else oclength = 0;
#endif /* SUPPORT_UCP */
for (i = 1; i <= min; i++)
{
if (memcmp(eptr, charptr, length) == 0) eptr += length;
+#ifdef SUPPORT_UCP
/* Need braces because of following else */
else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
else
@@ -2065,6 +2070,9 @@ for (;;)
if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
eptr += oclength;
}
+#else /* without SUPPORT_UCP */
+ else { RRETURN(MATCH_NOMATCH); }
+#endif /* SUPPORT_UCP */
}
if (min == max) continue;
@@ -2077,6 +2085,7 @@ for (;;)
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH);
if (memcmp(eptr, charptr, length) == 0) eptr += length;
+#ifdef SUPPORT_UCP
/* Need braces because of following else */
else if (oclength == 0) { RRETURN(MATCH_NOMATCH); }
else
@@ -2084,6 +2093,9 @@ for (;;)
if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH);
eptr += oclength;
}
+#else /* without SUPPORT_UCP */
+ else { RRETURN (MATCH_NOMATCH); }
+#endif /* SUPPORT_UCP */
}
/* Control never gets here */
}
@@ -2095,12 +2107,16 @@ for (;;)
{
if (eptr > md->end_subject - length) break;
if (memcmp(eptr, charptr, length) == 0) eptr += length;
+#ifdef SUPPORT_UCP
else if (oclength == 0) break;
else
{
if (memcmp(eptr, occhars, oclength) != 0) break;
eptr += oclength;
}
+#else /* without SUPPORT_UCP */
+ else break;
+#endif /* SUPPORT_UCP */
}
if (possessive) continue;
@@ -2108,7 +2124,12 @@ for (;;)
{
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0);
if (rrc != MATCH_NOMATCH) RRETURN(rrc);
+#ifdef SUPPORT_UCP
+ eptr--;
+ BACKCHAR(eptr);
+#else /* without SUPPORT_UCP */
eptr -= length;
+#endif /* SUPPORT_UCP */
}
RRETURN(MATCH_NOMATCH);
}
diff --git a/testdata/testinput6 b/testdata/testinput6
index 5a541f3..a0de1f1 100644
--- a/testdata/testinput6
+++ b/testdata/testinput6
@@ -762,4 +762,14 @@ of case for anything other than the ASCII letters. /
/^\p{Balinese}\p{Cuneiform}\p{Nko}\p{Phags_Pa}\p{Phoenician}/8
\x{1b00}\x{12000}\x{7c0}\x{a840}\x{10900}
+/The next two are special cases where the lengths of the different cases of the
+same character differ. The first went wrong with heap fram storage; the 2nd
+was broken in all cases./
+
+/^\x{023a}+?(\x{0130}+)/8i
+ \x{023a}\x{2c65}\x{0130}
+
+/^\x{023a}+([^X])/8i
+ \x{023a}\x{2c65}X
+
/ End of testinput6 /
diff --git a/testdata/testinput9 b/testdata/testinput9
index 23d3d3e..e11dda1 100644
--- a/testdata/testinput9
+++ b/testdata/testinput9
@@ -813,4 +813,10 @@
\x{1c5}XY
AXY
+/^\x{023a}+?(\x{0130}+)/8i
+ \x{023a}\x{2c65}\x{0130}
+
+/^\x{023a}+([^X])/8i
+ \x{023a}\x{2c65}X
+
/ End /
diff --git a/testdata/testoutput6 b/testdata/testoutput6
index a8bb0ad..3b907a4 100644
--- a/testdata/testoutput6
+++ b/testdata/testoutput6
@@ -1432,4 +1432,18 @@ of case for anything other than the ASCII letters. /
\x{1b00}\x{12000}\x{7c0}\x{a840}\x{10900}
0: \x{1b00}\x{12000}\x{7c0}\x{a840}\x{10900}
+/The next two are special cases where the lengths of the different cases of the
+same character differ. The first went wrong with heap fram storage; the 2nd
+was broken in all cases./
+
+/^\x{023a}+?(\x{0130}+)/8i
+ \x{023a}\x{2c65}\x{0130}
+ 0: \x{23a}\x{2c65}\x{130}
+ 1: \x{130}
+
+/^\x{023a}+([^X])/8i
+ \x{023a}\x{2c65}X
+ 0: \x{23a}\x{2c65}
+ 1: \x{2c65}
+
/ End of testinput6 /
diff --git a/testdata/testoutput9 b/testdata/testoutput9
index 000e2b9..2cc61ac 100644
--- a/testdata/testoutput9
+++ b/testdata/testoutput9
@@ -1624,4 +1624,12 @@ No match
AXY
No match
+/^\x{023a}+?(\x{0130}+)/8i
+ \x{023a}\x{2c65}\x{0130}
+ 0: \x{23a}\x{2c65}\x{130}
+
+/^\x{023a}+([^X])/8i
+ \x{023a}\x{2c65}X
+ 0: \x{23a}\x{2c65}
+
/ End /