summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Widenius <monty@askmonty.org>2009-12-03 14:02:37 +0200
committerMichael Widenius <monty@askmonty.org>2009-12-03 14:02:37 +0200
commit626dd5e81a87f2eefecc49f7a140708062d7fde4 (patch)
tree100655a3b097525c21bc6c8101634b71e408ee64
parent65ca2521f18b0b3400d8b45edae30baf2dbd6401 (diff)
downloadmariadb-git-626dd5e81a87f2eefecc49f7a140708062d7fde4.tar.gz
Applied patch from to fix some problems with Croatian character set and LIKE queries
Author: Alexander Barkov License: GPL mysql-test/t/ctype_ucs.test: Added test case for Croatina character set
-rw-r--r--mysql-test/r/ctype_ucs.result44
-rw-r--r--mysql-test/t/ctype_ucs.test31
-rw-r--r--strings/ctype-ucs2.c18
3 files changed, 88 insertions, 5 deletions
diff --git a/mysql-test/r/ctype_ucs.result b/mysql-test/r/ctype_ucs.result
index 428629e7e9e..c14422470ca 100644
--- a/mysql-test/r/ctype_ucs.result
+++ b/mysql-test/r/ctype_ucs.result
@@ -1211,3 +1211,47 @@ HEX(DAYNAME(19700101))
0427043504420432043504400433
SET character_set_connection=latin1;
End of 5.0 tests
+Start of 5.1 tests
+SET NAMES utf8;
+CREATE TABLE t1 (
+a varchar(10) CHARACTER SET ucs2 COLLATE ucs2_czech_ci,
+key(a)
+);
+INSERT INTO t1 VALUES
+('aa'),('bb'),('cc'),('dd'),('ee'),('ff'),('gg'),('hh'),('ii'),
+('jj'),('kk'),('ll'),('mm'),('nn'),('oo'),('pp'),('rr'),('ss'),
+('tt'),('uu'),('vv'),('ww'),('xx'),('yy'),('zz');
+INSERT INTO t1 VALUES ('ca'),('cz'),('ch');
+INSERT INTO t1 VALUES ('da'),('dz'), (X'0064017E');
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'b%';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 23 NULL 1 Using where; Using index
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'c%';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 23 NULL 30 Using where; Using index
+SELECT * FROM t1 WHERE a LIKE 'c%';
+a
+ca
+cc
+cz
+ch
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'ch%';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 23 NULL 1 Using where; Using index
+SELECT * FROM t1 WHERE a LIKE 'ch%';
+a
+ch
+ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ucs2 COLLATE ucs2_croatian_ci;
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'd%';
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 23 NULL 1 Using where; Using index
+SELECT hex(concat('d',_ucs2 0x017E,'%'));
+hex(concat('d',_ucs2 0x017E,'%'))
+0064017E0025
+EXPLAIN SELECT * FROM t1 WHERE a LIKE concat('d',_ucs2 0x017E,'%');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t1 range a a 23 NULL 1 Using where; Using index
+SELECT hex(a) FROM t1 WHERE a LIKE concat('D',_ucs2 0x017E,'%');
+hex(a)
+0064017E
+DROP TABLE t1;
diff --git a/mysql-test/t/ctype_ucs.test b/mysql-test/t/ctype_ucs.test
index e247110658b..be1ca63d9e9 100644
--- a/mysql-test/t/ctype_ucs.test
+++ b/mysql-test/t/ctype_ucs.test
@@ -723,3 +723,34 @@ SELECT HEX(DAYNAME(19700101));
SET character_set_connection=latin1;
--echo End of 5.0 tests
+
+
+--echo Start of 5.1 tests
+#
+# Checking my_like_range_ucs2
+#
+SET NAMES utf8;
+CREATE TABLE t1 (
+ a varchar(10) CHARACTER SET ucs2 COLLATE ucs2_czech_ci,
+ key(a)
+);
+INSERT INTO t1 VALUES
+('aa'),('bb'),('cc'),('dd'),('ee'),('ff'),('gg'),('hh'),('ii'),
+('jj'),('kk'),('ll'),('mm'),('nn'),('oo'),('pp'),('rr'),('ss'),
+('tt'),('uu'),('vv'),('ww'),('xx'),('yy'),('zz');
+INSERT INTO t1 VALUES ('ca'),('cz'),('ch');
+INSERT INTO t1 VALUES ('da'),('dz'), (X'0064017E');
+# This one should scan only one row
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'b%';
+# This one should scan many rows: 'c' is a contraction head
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'c%';
+SELECT * FROM t1 WHERE a LIKE 'c%';
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'ch%';
+SELECT * FROM t1 WHERE a LIKE 'ch%';
+ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ucs2 COLLATE ucs2_croatian_ci;
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'd%';
+SELECT hex(concat('d',_ucs2 0x017E,'%'));
+EXPLAIN SELECT * FROM t1 WHERE a LIKE concat('d',_ucs2 0x017E,'%');
+SELECT hex(a) FROM t1 WHERE a LIKE concat('D',_ucs2 0x017E,'%');
+
+DROP TABLE t1;
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 2607e0f6d43..7ad25fb3565 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1498,6 +1498,14 @@ void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
}
}
+
+static inline my_wc_t
+ucs2_to_wc(const uchar *ptr)
+{
+ return (((uint) ptr[0]) << 8) + ptr[1];
+}
+
+
/*
** Calculate min_str and max_str that ranges a LIKE string.
** Arguments:
@@ -1531,6 +1539,7 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0
; ptr+=2, charlen--)
{
+ my_wc_t wc;
if (ptr[0] == '\0' && ptr[1] == escape && ptr + 1 < end)
{
ptr+=2; /* Skip escape */
@@ -1567,9 +1576,9 @@ fill_max_and_min:
}
if (have_contractions && ptr + 3 < end &&
- ptr[0] == '\0' &&
- my_uca_can_be_contraction_head(cs, (uchar) ptr[1]))
+ my_uca_can_be_contraction_head(cs, (wc= ucs2_to_wc((uchar*) ptr))))
{
+ my_wc_t wc2;
/* Contraction head found */
if (ptr[2] == '\0' && (ptr[3] == w_one || ptr[3] == w_many))
{
@@ -1581,9 +1590,8 @@ fill_max_and_min:
Check if the second letter can be contraction part,
and if two letters really produce a contraction.
*/
- if (ptr[2] == '\0' &&
- my_uca_can_be_contraction_tail(cs, (uchar) ptr[3]) &&
- my_uca_contraction2_weight(cs,(uchar) ptr[1], (uchar) ptr[3]))
+ if (my_uca_can_be_contraction_tail(cs, (wc2= ucs2_to_wc((uchar*) ptr + 2))) &&
+ my_uca_contraction2_weight(cs, wc , wc2))
{
/* Contraction found */
if (charlen == 1 || min_str + 2 >= min_end)