summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgi Kodinov <Georgi.Kodinov@Oracle.com>2011-07-22 15:54:47 +0300
committerGeorgi Kodinov <Georgi.Kodinov@Oracle.com>2011-07-22 15:54:47 +0300
commit1a9b512d025e3b55c18feaee6afd67297313bb00 (patch)
treecbb0e1350aed9497cec1f75d21e26d289902dcc6
parente25a282970c44b36ab0bb9c442df68a2a7fefc06 (diff)
downloadmariadb-git-1a9b512d025e3b55c18feaee6afd67297313bb00.tar.gz
Bug #12319710: INVALID MEMORY READ AND/OR CRASH IN MY_UCA_CHARCMP
WITH UTF32 The 5.5 version of the UTF32 collation was not enforcing the BMP range that it currently supports when comparing with LIKE. Fixed by backporting the checks for the BMP from trunk. Added a named constant for the maximum character that can have a weight in the weight table.
-rw-r--r--mysql-test/r/ctype_utf32_uca.result20
-rw-r--r--mysql-test/t/ctype_utf32_uca.test21
-rw-r--r--strings/ctype-uca.c54
3 files changed, 86 insertions, 9 deletions
diff --git a/mysql-test/r/ctype_utf32_uca.result b/mysql-test/r/ctype_utf32_uca.result
index fd5a4199217..a9f056b2e44 100644
--- a/mysql-test/r/ctype_utf32_uca.result
+++ b/mysql-test/r/ctype_utf32_uca.result
@@ -2415,5 +2415,25 @@ HEX(s1)
00000061
DROP TABLE t1;
#
+# Bug #12319710 : INVALID MEMORY READ AND/OR CRASH IN
+# MY_UCA_CHARCMP WITH UTF32
+#
+SET collation_connection=utf32_unicode_ci;
+CREATE TABLE t1 (a TEXT CHARACTER SET utf32 COLLATE utf32_turkish_ci NOT NULL);
+INSERT INTO t1 VALUES ('a'), ('b');
+CREATE TABLE t2 (b VARBINARY(5) NOT NULL);
+#insert chars outside of BMP
+INSERT INTO t2 VALUEs (0x082837),(0x082837);
+#test for read-out-of-bounds with non-BMP chars as a LIKE pattern
+SELECT * FROM t1,t2 WHERE a LIKE b;
+a b
+#test the original statement
+SELECT 1 FROM t1 AS t1_0 NATURAL LEFT OUTER JOIN t2 AS t2_0
+RIGHT JOIN t1 AS t1_1 ON t1_0.a LIKE t2_0.b;
+1
+1
+1
+DROP TABLE t1,t2;
+#
# End of 5.5 tests
#
diff --git a/mysql-test/t/ctype_utf32_uca.test b/mysql-test/t/ctype_utf32_uca.test
index a62ffbf95c7..1d79fbe1616 100644
--- a/mysql-test/t/ctype_utf32_uca.test
+++ b/mysql-test/t/ctype_utf32_uca.test
@@ -293,6 +293,27 @@ SET collation_connection=utf32_czech_ci;
--source include/ctype_czech.inc
--source include/ctype_like_ignorable.inc
+--echo #
+--echo # Bug #12319710 : INVALID MEMORY READ AND/OR CRASH IN
+--echo # MY_UCA_CHARCMP WITH UTF32
+--echo #
+
+SET collation_connection=utf32_unicode_ci;
+CREATE TABLE t1 (a TEXT CHARACTER SET utf32 COLLATE utf32_turkish_ci NOT NULL);
+INSERT INTO t1 VALUES ('a'), ('b');
+CREATE TABLE t2 (b VARBINARY(5) NOT NULL);
+
+--echo #insert chars outside of BMP
+INSERT INTO t2 VALUEs (0x082837),(0x082837);
+
+--echo #test for read-out-of-bounds with non-BMP chars as a LIKE pattern
+SELECT * FROM t1,t2 WHERE a LIKE b;
+
+--echo #test the original statement
+SELECT 1 FROM t1 AS t1_0 NATURAL LEFT OUTER JOIN t2 AS t2_0
+RIGHT JOIN t1 AS t1_1 ON t1_0.a LIKE t2_0.b;
+
+DROP TABLE t1,t2;
--echo #
--echo # End of 5.5 tests
diff --git a/strings/ctype-uca.c b/strings/ctype-uca.c
index d6be395535c..70d2df3bab9 100644
--- a/strings/ctype-uca.c
+++ b/strings/ctype-uca.c
@@ -42,6 +42,7 @@
#define MY_UCA_NCHARS 256
#define MY_UCA_CMASK 255
#define MY_UCA_PSHIFT 8
+#define MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT 0xFFFF
uint16 page000data[]= { /* 0000 (4 weights per char) */
0x0000,0x0000,0x0000,0x0000, 0x0000,0x0000,0x0000,0x0000,
@@ -6984,7 +6985,7 @@ static int my_uca_scanner_next_any(my_uca_scanner *scanner)
return -1;
scanner->sbeg+= mb_len;
- if (wc > 0xFFFF)
+ if (wc > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT)
{
/* Return 0xFFFD as weight for all characters outside BMP */
scanner->wbeg= nochar;
@@ -7322,6 +7323,33 @@ static size_t my_strnxfrm_uca(CHARSET_INFO *cs,
+/**
+ Helper function:
+ Find address of weights of the given character.
+
+ @param weights UCA weight array
+ @param lengths UCA length array
+ @param ch character Unicode code point
+
+ @return Weight array
+ @retval pointer to weight array for the given character,
+ or NULL if this page does not have implicit weights.
+*/
+
+static inline uint16 *
+my_char_weight_addr(CHARSET_INFO *cs, uint wc)
+{
+ uint page, ofst;
+ uchar *ucal= cs->sort_order;
+ uint16 **ucaw= cs->sort_order_big;
+
+ return wc > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT ? NULL :
+ (ucaw[page= (wc >> 8)] ?
+ ucaw[page] + (ofst= (wc & 0xFF)) * ucal[page] :
+ NULL);
+}
+
+
/*
This function compares if two characters are the same.
The sign +1 or -1 does not matter. The only
@@ -7332,17 +7360,20 @@ static size_t my_strnxfrm_uca(CHARSET_INFO *cs,
static int my_uca_charcmp(CHARSET_INFO *cs, my_wc_t wc1, my_wc_t wc2)
{
- size_t page1= wc1 >> MY_UCA_PSHIFT;
- size_t page2= wc2 >> MY_UCA_PSHIFT;
- uchar *ucal= cs->sort_order;
- uint16 **ucaw= cs->sort_order_big;
- size_t length1= ucal[page1];
- size_t length2= ucal[page2];
- uint16 *weight1= ucaw[page1] + (wc1 & MY_UCA_CMASK) * ucal[page1];
- uint16 *weight2= ucaw[page2] + (wc2 & MY_UCA_CMASK) * ucal[page2];
+ size_t length1, length2;
+ uint16 *weight1= my_char_weight_addr(cs, wc1);
+ uint16 *weight2= my_char_weight_addr(cs, wc2);
if (!weight1 || !weight2)
return wc1 != wc2;
+
+ /* Quickly compare first weights */
+ if (weight1[0] != weight2[0])
+ return 1;
+
+ /* Thoroughly compare all weights */
+ length1= cs->sort_order[wc1 >> MY_UCA_PSHIFT];
+ length2= cs->sort_order[wc2 >> MY_UCA_PSHIFT];
if (length1 > length2)
return memcmp((const void*)weight1, (const void*)weight2, length2*2) ?
@@ -7924,6 +7955,11 @@ static my_bool create_tailoring(CHARSET_INFO *cs, void *(*alloc)(size_t))
*/
for (i=0; i < rc; i++)
{
+ /* check if the shift or the reset characters are out of range */
+ if (rule[i].curr[0] > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT ||
+ rule[i].base > MAX_UCA_CHAR_WITH_EXPLICIT_WEIGHT)
+ return 1;
+
if (!rule[i].curr[1]) /* If not a contraction */
{
uint pageb= (rule[i].base >> 8) & 0xFF;