Applied patch from to fix some problems with Croatian character set and LIKE queries

Author: Alexander Barkov License: GPL mysql-test/t/ctype_ucs.test: Added test case for Croatina character set
author: Michael Widenius <monty@askmonty.org> 2009-12-03 14:02:37 +0200
committer: Michael Widenius <monty@askmonty.org> 2009-12-03 14:02:37 +0200
commit: 626dd5e81a87f2eefecc49f7a140708062d7fde4 (patch)
tree: 100655a3b097525c21bc6c8101634b71e408ee64
parent: 65ca2521f18b0b3400d8b45edae30baf2dbd6401 (diff)
download: mariadb-git-626dd5e81a87f2eefecc49f7a140708062d7fde4.tar.gz
3 files changed, 88 insertions, 5 deletions
diff --git a/mysql-test/r/ctype_ucs.result b/mysql-test/r/ctype_ucs.result
index 428629e7e9e..c14422470ca 100644
--- a/mysql-test/r/ctype_ucs.result
+++ b/mysql-test/r/ctype_ucs.result
@@ -1211,3 +1211,47 @@ HEX(DAYNAME(19700101))
 0427043504420432043504400433
 SET character_set_connection=latin1;
 End of 5.0 tests
+Start of 5.1 tests
+SET NAMES utf8;
+CREATE TABLE t1 (
+a varchar(10) CHARACTER SET ucs2 COLLATE ucs2_czech_ci,
+key(a)
+);
+INSERT INTO t1 VALUES
+('aa'),('bb'),('cc'),('dd'),('ee'),('ff'),('gg'),('hh'),('ii'),
+('jj'),('kk'),('ll'),('mm'),('nn'),('oo'),('pp'),('rr'),('ss'),
+('tt'),('uu'),('vv'),('ww'),('xx'),('yy'),('zz');
+INSERT INTO t1 VALUES ('ca'),('cz'),('ch');
+INSERT INTO t1 VALUES ('da'),('dz'), (X'0064017E');
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'b%';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	23	NULL	1	Using where; Using index
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'c%';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	23	NULL	30	Using where; Using index
+SELECT * FROM t1 WHERE a LIKE 'c%';
+a
+ca
+cc
+cz
+ch
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'ch%';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	23	NULL	1	Using where; Using index
+SELECT * FROM t1 WHERE a LIKE 'ch%';
+a
+ch
+ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ucs2 COLLATE ucs2_croatian_ci;
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'd%';
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	23	NULL	1	Using where; Using index
+SELECT hex(concat('d',_ucs2 0x017E,'%'));
+hex(concat('d',_ucs2 0x017E,'%'))
+0064017E0025
+EXPLAIN SELECT * FROM t1 WHERE a LIKE concat('d',_ucs2 0x017E,'%');
+id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
+1	SIMPLE	t1	range	a	a	23	NULL	1	Using where; Using index
+SELECT hex(a) FROM t1 WHERE a LIKE concat('D',_ucs2 0x017E,'%');
+hex(a)
+0064017E
+DROP TABLE t1;
diff --git a/mysql-test/t/ctype_ucs.test b/mysql-test/t/ctype_ucs.test
index e247110658b..be1ca63d9e9 100644
--- a/mysql-test/t/ctype_ucs.test
+++ b/mysql-test/t/ctype_ucs.test
@@ -723,3 +723,34 @@ SELECT HEX(DAYNAME(19700101));
 SET character_set_connection=latin1;
 
 --echo End of 5.0 tests
+
+
+--echo Start of 5.1 tests
+#
+# Checking my_like_range_ucs2
+#
+SET NAMES utf8;
+CREATE TABLE t1 (
+  a varchar(10) CHARACTER SET ucs2 COLLATE ucs2_czech_ci,
+  key(a)
+);
+INSERT INTO t1 VALUES
+('aa'),('bb'),('cc'),('dd'),('ee'),('ff'),('gg'),('hh'),('ii'),
+('jj'),('kk'),('ll'),('mm'),('nn'),('oo'),('pp'),('rr'),('ss'),
+('tt'),('uu'),('vv'),('ww'),('xx'),('yy'),('zz');
+INSERT INTO t1 VALUES ('ca'),('cz'),('ch');
+INSERT INTO t1 VALUES ('da'),('dz'), (X'0064017E');
+# This one should scan only one row
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'b%';
+# This one should scan many rows: 'c' is a contraction head
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'c%';
+SELECT * FROM t1 WHERE a LIKE 'c%';
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'ch%';
+SELECT * FROM t1 WHERE a LIKE 'ch%';
+ALTER TABLE t1 MODIFY a VARCHAR(10) CHARACTER SET ucs2 COLLATE ucs2_croatian_ci;
+EXPLAIN SELECT * FROM t1 WHERE a LIKE 'd%';
+SELECT hex(concat('d',_ucs2 0x017E,'%'));
+EXPLAIN SELECT * FROM t1 WHERE a LIKE concat('d',_ucs2 0x017E,'%');
+SELECT hex(a) FROM t1 WHERE a LIKE concat('D',_ucs2 0x017E,'%');
+
+DROP TABLE t1;
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index 2607e0f6d43..7ad25fb3565 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1498,6 +1498,14 @@ void my_hash_sort_ucs2_bin(CHARSET_INFO *cs __attribute__((unused)),
   }
 }
 
+
+static inline my_wc_t
+ucs2_to_wc(const uchar *ptr)
+{
+  return (((uint) ptr[0]) << 8) + ptr[1];
+}
+
+
 /*
 ** Calculate min_str and max_str that ranges a LIKE string.
 ** Arguments:
@@ -1531,6 +1539,7 @@ my_bool my_like_range_ucs2(CHARSET_INFO *cs,
   for ( ; ptr + 1 < end && min_str + 1 < min_end && charlen > 0
         ; ptr+=2, charlen--)
   {
+    my_wc_t wc;
     if (ptr[0] == '\0' && ptr[1] == escape && ptr + 1 < end)
     {
       ptr+=2;					/* Skip escape */
@@ -1567,9 +1576,9 @@ fill_max_and_min:
     }
 
     if (have_contractions && ptr + 3 < end &&
-        ptr[0] == '\0' &&
-        my_uca_can_be_contraction_head(cs, (uchar) ptr[1]))
+        my_uca_can_be_contraction_head(cs, (wc= ucs2_to_wc((uchar*) ptr))))
     {
+      my_wc_t wc2;
       /* Contraction head found */
       if (ptr[2] == '\0' && (ptr[3] == w_one || ptr[3] == w_many))
       {
@@ -1581,9 +1590,8 @@ fill_max_and_min:
         Check if the second letter can be contraction part,
         and if two letters really produce a contraction.
       */
-      if (ptr[2] == '\0' &&
-          my_uca_can_be_contraction_tail(cs, (uchar) ptr[3]) &&
-          my_uca_contraction2_weight(cs,(uchar) ptr[1], (uchar) ptr[3]))
+      if (my_uca_can_be_contraction_tail(cs, (wc2= ucs2_to_wc((uchar*) ptr + 2))) &&
+          my_uca_contraction2_weight(cs, wc , wc2))
       {
         /* Contraction found */
         if (charlen == 1 || min_str + 2 >= min_end)
author	Michael Widenius <monty@askmonty.org>	2009-12-03 14:02:37 +0200
committer	Michael Widenius <monty@askmonty.org>	2009-12-03 14:02:37 +0200
commit	626dd5e81a87f2eefecc49f7a140708062d7fde4 (patch)
tree	100655a3b097525c21bc6c8101634b71e408ee64
parent	65ca2521f18b0b3400d8b45edae30baf2dbd6401 (diff)
download	mariadb-git-626dd5e81a87f2eefecc49f7a140708062d7fde4.tar.gz