summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunknown <bar@bar.intranet.mysql.r18.ru>2004-03-26 18:14:39 +0400
committerunknown <bar@bar.intranet.mysql.r18.ru>2004-03-26 18:14:39 +0400
commit63e1d22f8f46966c13d88a4f2e9acd7fa3e9c9b6 (patch)
tree0473522f9282da1daba81a8db274d031c52e0fe8
parent403948cbb3a27da905269857616c60c8fc4675ba (diff)
downloadmariadb-git-63e1d22f8f46966c13d88a4f2e9acd7fa3e9c9b6.tar.gz
UTF8 now process space as PAD character correctly.
-rw-r--r--mysql-test/r/compare.result24
-rw-r--r--mysql-test/r/ctype_utf8.result24
-rw-r--r--mysql-test/t/compare.test17
-rw-r--r--mysql-test/t/ctype_utf8.test17
-rw-r--r--strings/ctype-utf8.c96
5 files changed, 170 insertions, 8 deletions
diff --git a/mysql-test/r/compare.result b/mysql-test/r/compare.result
index 3ad3465fac0..bf8a5106044 100644
--- a/mysql-test/r/compare.result
+++ b/mysql-test/r/compare.result
@@ -12,3 +12,27 @@ select * from t1;
id
000000000001
drop table t1;
+SELECT 'a' = 'a ';
+'a' = 'a '
+1
+SELECT 'a\0' < 'a';
+'a\0' < 'a'
+1
+SELECT 'a\0' < 'a ';
+'a\0' < 'a '
+1
+SELECT 'a\t' < 'a';
+'a\t' < 'a'
+1
+SELECT 'a\t' < 'a ';
+'a\t' < 'a '
+1
+CREATE TABLE t1 (a char(10) not null);
+INSERT INTO t1 VALUES ('a'),('a\0'),('a\t'),('a ');
+SELECT hex(a),STRCMP(a,'a'), STRCMP(a,'a ') FROM t1;
+hex(a) STRCMP(a,'a') STRCMP(a,'a ')
+61 0 0
+6100 -1 -1
+6109 -1 -1
+61 0 0
+DROP TABLE t1;
diff --git a/mysql-test/r/ctype_utf8.result b/mysql-test/r/ctype_utf8.result
index 33a7e91fbeb..28af71b7681 100644
--- a/mysql-test/r/ctype_utf8.result
+++ b/mysql-test/r/ctype_utf8.result
@@ -63,6 +63,30 @@ select 'A' like 'a' collate utf8_bin;
select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%');
_utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%')
1
+SELECT 'a' = 'a ';
+'a' = 'a '
+1
+SELECT 'a\0' < 'a';
+'a\0' < 'a'
+1
+SELECT 'a\0' < 'a ';
+'a\0' < 'a '
+1
+SELECT 'a\t' < 'a';
+'a\t' < 'a'
+1
+SELECT 'a\t' < 'a ';
+'a\t' < 'a '
+1
+CREATE TABLE t1 (a char(10) character set utf8 not null);
+INSERT INTO t1 VALUES ('a'),('a\0'),('a\t'),('a ');
+SELECT hex(a),STRCMP(a,'a'), STRCMP(a,'a ') FROM t1;
+hex(a) STRCMP(a,'a') STRCMP(a,'a ')
+61 0 0
+6100 -1 -1
+6109 -1 -1
+61 0 0
+DROP TABLE t1;
select insert('txs',2,1,'hi'),insert('is ',4,0,'a'),insert('txxxxt',2,4,'es');
insert('txs',2,1,'hi') insert('is ',4,0,'a') insert('txxxxt',2,4,'es')
this is a test
diff --git a/mysql-test/t/compare.test b/mysql-test/t/compare.test
index e5a2e310866..b0cef48dd3f 100644
--- a/mysql-test/t/compare.test
+++ b/mysql-test/t/compare.test
@@ -13,3 +13,20 @@ select * from t1 where id=000000000001;
delete from t1 where id=000000000002;
select * from t1;
drop table t1;
+
+#
+# Check the following:
+# "a" == "a "
+# "a\0" < "a"
+# "a\0" < "a "
+
+SELECT 'a' = 'a ';
+SELECT 'a\0' < 'a';
+SELECT 'a\0' < 'a ';
+SELECT 'a\t' < 'a';
+SELECT 'a\t' < 'a ';
+
+CREATE TABLE t1 (a char(10) not null);
+INSERT INTO t1 VALUES ('a'),('a\0'),('a\t'),('a ');
+SELECT hex(a),STRCMP(a,'a'), STRCMP(a,'a ') FROM t1;
+DROP TABLE t1;
diff --git a/mysql-test/t/ctype_utf8.test b/mysql-test/t/ctype_utf8.test
index 964ac84fbbe..4e68efeffc3 100644
--- a/mysql-test/t/ctype_utf8.test
+++ b/mysql-test/t/ctype_utf8.test
@@ -34,6 +34,23 @@ select 'A' like 'a' collate utf8_bin;
select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD0B1,_utf8 '%');
#
+# Check the following:
+# "a" == "a "
+# "a\0" < "a"
+# "a\0" < "a "
+
+SELECT 'a' = 'a ';
+SELECT 'a\0' < 'a';
+SELECT 'a\0' < 'a ';
+SELECT 'a\t' < 'a';
+SELECT 'a\t' < 'a ';
+
+CREATE TABLE t1 (a char(10) character set utf8 not null);
+INSERT INTO t1 VALUES ('a'),('a\0'),('a\t'),('a ');
+SELECT hex(a),STRCMP(a,'a'), STRCMP(a,'a ') FROM t1;
+DROP TABLE t1;
+
+#
# Fix this, it should return 1:
#
#select _utf8 0xD0B0D0B1D0B2 like concat(_utf8'%',_utf8 0xD091,_utf8 '%');
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index 886ecfbd0c9..82787f2b65f 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -1837,18 +1837,98 @@ static int my_strnncoll_utf8(CHARSET_INFO *cs,
}
+
/*
- TODO: Has to be fixed as strnncollsp in ctype-simple
+ Compare strings, discarding end space
+
+ SYNOPSIS
+ my_strnncollsp_utf8()
+ cs character set handler
+ a First string to compare
+ a_length Length of 'a'
+ b Second string to compare
+ b_length Length of 'b'
+
+ IMPLEMENTATION
+ If one string is shorter as the other, then we space extend the other
+ so that the strings have equal length.
+
+ This will ensure that the following things hold:
+
+ "a" == "a "
+ "a\0" < "a"
+ "a\0" < "a "
+
+ RETURN
+ < 0 a < b
+ = 0 a == b
+ > 0 a > b
*/
-static
-int my_strnncollsp_utf8(CHARSET_INFO * cs,
- const uchar *s, uint slen,
- const uchar *t, uint tlen)
+static int my_strnncollsp_utf8(CHARSET_INFO *cs,
+ const uchar *s, uint slen,
+ const uchar *t, uint tlen)
{
- for ( ; slen && s[slen-1] == ' ' ; slen--);
- for ( ; tlen && t[tlen-1] == ' ' ; tlen--);
- return my_strnncoll_utf8(cs,s,slen,t,tlen);
+ int s_res,t_res;
+ my_wc_t s_wc,t_wc;
+ const uchar *se= s+slen;
+ const uchar *te= t+tlen;
+
+ while ( s < se && t < te )
+ {
+ int plane;
+ s_res=my_utf8_uni(cs,&s_wc, s, se);
+ t_res=my_utf8_uni(cs,&t_wc, t, te);
+
+ if ( s_res <= 0 || t_res <= 0 )
+ {
+ /* Incorrect string, compare by char value */
+ return ((int)s[0]-(int)t[0]);
+ }
+
+ plane=(s_wc>>8) & 0xFF;
+ s_wc = uni_plane[plane] ? uni_plane[plane][s_wc & 0xFF].sort : s_wc;
+ plane=(t_wc>>8) & 0xFF;
+ t_wc = uni_plane[plane] ? uni_plane[plane][t_wc & 0xFF].sort : t_wc;
+ if ( s_wc != t_wc )
+ {
+ return ((int) s_wc) - ((int) t_wc);
+ }
+
+ s+=s_res;
+ t+=t_res;
+ }
+
+ slen= se-s;
+ tlen= te-t;
+
+ if (slen != tlen)
+ {
+ int swap= 0;
+ if (slen < tlen)
+ {
+ slen= tlen;
+ s= t;
+ se= te;
+ swap= -1;
+ }
+ /*
+ This following loop uses the fact that in UTF-8
+ all multibyte characters are greater than space,
+ and all multibyte head characters are greater than
+ space. It means if we meet a character greater
+ than space, it always means that the longer string
+ is greater. So we can reuse the same loop from the
+ 8bit version, without having to process full multibute
+ sequences.
+ */
+ for ( ; s < se; s++)
+ {
+ if (*s != ' ')
+ return ((int)*s - (int) ' ') ^ swap;
+ }
+ }
+ return 0;
}