summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorunknown <bar@bar.intranet.mysql.r18.ru>2004-01-19 19:16:30 +0400
committerunknown <bar@bar.intranet.mysql.r18.ru>2004-01-19 19:16:30 +0400
commit162f1dc5e6f8cd24ec996aa701a50bf20b8f6a36 (patch)
treef567782e8ab2357e9f089387fcb0d454a6a02ce3
parentd89fd8281bb6795da5d897561fb19e19a631ce0e (diff)
downloadmariadb-git-162f1dc5e6f8cd24ec996aa701a50bf20b8f6a36.tar.gz
UCS-2 aligning 0xAA -> 0x00AA
-rw-r--r--include/m_ctype.h3
-rw-r--r--mysql-test/r/ctype_ucs.result48
-rw-r--r--mysql-test/t/ctype_ucs.test21
-rw-r--r--mysys/charset.c4
-rw-r--r--sql/item.h4
-rw-r--r--sql/sql_string.cc46
-rw-r--r--sql/sql_string.h1
-rw-r--r--strings/ctype-big5.c2
-rw-r--r--strings/ctype-bin.c1
-rw-r--r--strings/ctype-czech.c1
-rw-r--r--strings/ctype-euc_kr.c2
-rw-r--r--strings/ctype-extra.c1
-rw-r--r--strings/ctype-gb2312.c2
-rw-r--r--strings/ctype-gbk.c2
-rw-r--r--strings/ctype-latin1.c3
-rw-r--r--strings/ctype-sjis.c2
-rw-r--r--strings/ctype-tis620.c2
-rw-r--r--strings/ctype-ucs2.c2
-rw-r--r--strings/ctype-ujis.c2
-rw-r--r--strings/ctype-utf8.c2
-rw-r--r--strings/ctype-win1250ch.c1
21 files changed, 148 insertions, 4 deletions
diff --git a/include/m_ctype.h b/include/m_ctype.h
index 0228b359111..4a9415f43f9 100644
--- a/include/m_ctype.h
+++ b/include/m_ctype.h
@@ -206,8 +206,9 @@ typedef struct charset_info_st
uchar state_map[256];
uchar ident_map[256];
uint strxfrm_multiply;
+ uint mbminlen;
uint mbmaxlen;
- char max_sort_char; /* For LIKE optimization */
+ char max_sort_char; /* For LIKE optimization */
MY_CHARSET_HANDLER *cset;
MY_COLLATION_HANDLER *coll;
diff --git a/mysql-test/r/ctype_ucs.result b/mysql-test/r/ctype_ucs.result
index 58761526150..d6e9cc690a2 100644
--- a/mysql-test/r/ctype_ucs.result
+++ b/mysql-test/r/ctype_ucs.result
@@ -276,3 +276,51 @@ aardvara
aardvark
aardvarz
DROP TABLE t1;
+SELECT HEX(_ucs2 0x0);
+HEX(_ucs2 0x0)
+0000
+SELECT HEX(_ucs2 0x01);
+HEX(_ucs2 0x01)
+0001
+SELECT HEX(_ucs2 0x012);
+HEX(_ucs2 0x012)
+0012
+SELECT HEX(_ucs2 0x0123);
+HEX(_ucs2 0x0123)
+0123
+SELECT HEX(_ucs2 0x01234);
+HEX(_ucs2 0x01234)
+00001234
+SELECT HEX(_ucs2 0x012345);
+HEX(_ucs2 0x012345)
+00012345
+SELECT HEX(_ucs2 0x0123456);
+HEX(_ucs2 0x0123456)
+00123456
+SELECT HEX(_ucs2 0x01234567);
+HEX(_ucs2 0x01234567)
+01234567
+SELECT HEX(_ucs2 0x012345678);
+HEX(_ucs2 0x012345678)
+000012345678
+SELECT HEX(_ucs2 0x0123456789);
+HEX(_ucs2 0x0123456789)
+000123456789
+SELECT HEX(_ucs2 0x0123456789A);
+HEX(_ucs2 0x0123456789A)
+00123456789A
+SELECT HEX(_ucs2 0x0123456789AB);
+HEX(_ucs2 0x0123456789AB)
+0123456789AB
+SELECT HEX(_ucs2 0x0123456789ABC);
+HEX(_ucs2 0x0123456789ABC)
+0000123456789ABC
+SELECT HEX(_ucs2 0x0123456789ABCD);
+HEX(_ucs2 0x0123456789ABCD)
+000123456789ABCD
+SELECT HEX(_ucs2 0x0123456789ABCDE);
+HEX(_ucs2 0x0123456789ABCDE)
+00123456789ABCDE
+SELECT HEX(_ucs2 0x0123456789ABCDEF);
+HEX(_ucs2 0x0123456789ABCDEF)
+0123456789ABCDEF
diff --git a/mysql-test/t/ctype_ucs.test b/mysql-test/t/ctype_ucs.test
index 7eec58563b3..fd2a1b1cd7d 100644
--- a/mysql-test/t/ctype_ucs.test
+++ b/mysql-test/t/ctype_ucs.test
@@ -197,3 +197,24 @@ DROP TABLE t1;
# END OF Bug 1264 test
#
########################################################
+
+
+# Bug #2390
+# Check alignment
+#
+SELECT HEX(_ucs2 0x0);
+SELECT HEX(_ucs2 0x01);
+SELECT HEX(_ucs2 0x012);
+SELECT HEX(_ucs2 0x0123);
+SELECT HEX(_ucs2 0x01234);
+SELECT HEX(_ucs2 0x012345);
+SELECT HEX(_ucs2 0x0123456);
+SELECT HEX(_ucs2 0x01234567);
+SELECT HEX(_ucs2 0x012345678);
+SELECT HEX(_ucs2 0x0123456789);
+SELECT HEX(_ucs2 0x0123456789A);
+SELECT HEX(_ucs2 0x0123456789AB);
+SELECT HEX(_ucs2 0x0123456789ABC);
+SELECT HEX(_ucs2 0x0123456789ABCD);
+SELECT HEX(_ucs2 0x0123456789ABCDE);
+SELECT HEX(_ucs2 0x0123456789ABCDEF);
diff --git a/mysys/charset.c b/mysys/charset.c
index 5e9e3c3fcaa..40a026f161f 100644
--- a/mysys/charset.c
+++ b/mysys/charset.c
@@ -131,7 +131,8 @@ static void simple_cs_init_functions(CHARSET_INFO *cs)
cs->coll= &my_collation_8bit_simple_ci_handler;
cs->cset= &my_charset_8bit_handler;
- cs->mbmaxlen = 1;
+ cs->mbminlen= 1;
+ cs->mbmaxlen= 1;
}
@@ -273,6 +274,7 @@ static int simple_cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
if (create_fromuni(to))
goto err;
}
+ to->mbminlen= 1;
to->mbmaxlen= 1;
return 0;
diff --git a/sql/item.h b/sql/item.h
index 5def1e2b710..e6ed8109534 100644
--- a/sql/item.h
+++ b/sql/item.h
@@ -477,7 +477,7 @@ public:
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
{
collation.set(cs, dv);
- str_value.set(str,length,cs);
+ str_value.set_or_copy_aligned(str,length,cs);
/*
We have to have a different max_length than 'length' here to
ensure that we get the right length if we do use the item
@@ -493,7 +493,7 @@ public:
CHARSET_INFO *cs, Derivation dv= DERIVATION_COERCIBLE)
{
collation.set(cs, dv);
- str_value.set(str,length,cs);
+ str_value.set_or_copy_aligned(str,length,cs);
max_length= str_value.numchars()*cs->mbmaxlen;
set_name(name_par,0,cs);
decimals=NOT_FIXED_DEC;
diff --git a/sql/sql_string.cc b/sql/sql_string.cc
index 89f48607969..9534c5605fe 100644
--- a/sql/sql_string.cc
+++ b/sql/sql_string.cc
@@ -228,6 +228,52 @@ bool String::copy(const char *str,uint32 arg_length, CHARSET_INFO *cs)
return FALSE;
}
+/*
+** For real multi-byte, ascii incompatible charactser sets,
+** like UCS-2, add leading zeros if we have an incomplete character.
+** Thus,
+** SELECT _ucs2 0xAA
+** will automatically be converted into
+** SELECT _ucs2 0x00AA
+*/
+
+bool String::set_or_copy_aligned(const char *str,uint32 arg_length,
+ CHARSET_INFO *cs)
+{
+ /* How many bytes are in incomplete character */
+ uint32 offs= (arg_length % cs->mbminlen);
+
+ if (!offs) /* All characters are complete, just copy */
+ {
+ set(str, arg_length, cs);
+ return FALSE;
+ }
+
+ offs= cs->mbmaxlen - offs; /* How many zeros we should prepend */
+ uint32 aligned_length= arg_length + offs;
+ if (alloc(aligned_length))
+ return TRUE;
+
+ /*
+ Probably this condition is not really necessary
+ because if aligned_length is 0 then offs is 0 too
+ and we'll return after calling set().
+ */
+ if ((str_length= aligned_length))
+ {
+ /*
+ Note, this is only safe for little-endian UCS-2.
+ If we add big-endian UCS-2 sometimes, this code
+ will be more complicated. But it's OK for now.
+ */
+ bzero((char*)Ptr, offs);
+ memcpy(Ptr + offs, str, arg_length);
+ }
+ Ptr[aligned_length]=0;
+ str_charset=cs;
+ return FALSE;
+}
+
/* Copy with charset convertion */
bool String::copy(const char *str, uint32 arg_length,
diff --git a/sql/sql_string.h b/sql/sql_string.h
index 325611737ca..8817aa8eab8 100644
--- a/sql/sql_string.h
+++ b/sql/sql_string.h
@@ -183,6 +183,7 @@ public:
bool copy(); // Alloc string if not alloced
bool copy(const String &s); // Allocate new string
bool copy(const char *s,uint32 arg_length, CHARSET_INFO *cs); // Allocate new string
+ bool set_or_copy_aligned(const char *s, uint32 arg_length, CHARSET_INFO *cs);
bool copy(const char*s,uint32 arg_length, CHARSET_INFO *csfrom,
CHARSET_INFO *csto);
bool append(const String &s);
diff --git a/strings/ctype-big5.c b/strings/ctype-big5.c
index c5ddc167d0d..8d4081fb2aa 100644
--- a/strings/ctype-big5.c
+++ b/strings/ctype-big5.c
@@ -6281,6 +6281,7 @@ CHARSET_INFO my_charset_big5_chinese_ci=
"",
"",
1, /* strxfrm_multiply */
+ 1, /* mbminlen */
2, /* mbmaxlen */
0,
&my_charset_big5_handler,
@@ -6304,6 +6305,7 @@ CHARSET_INFO my_charset_big5_bin=
"",
"",
1, /* strxfrm_multiply */
+ 1, /* mbminlen */
2, /* mbmaxlen */
0,
&my_charset_big5_handler,
diff --git a/strings/ctype-bin.c b/strings/ctype-bin.c
index 6f28c43b2c6..67435b7df6c 100644
--- a/strings/ctype-bin.c
+++ b/strings/ctype-bin.c
@@ -381,6 +381,7 @@ CHARSET_INFO my_charset_bin =
NULL, /* tab_from_uni */
"","",
1, /* strxfrm_multiply */
+ 1, /* mbminlen */
1, /* mbmaxlen */
(char) 255, /* max_sort_char */
&my_charset_handler,
diff --git a/strings/ctype-czech.c b/strings/ctype-czech.c
index b2e4f1886ed..1a07a5eba7e 100644
--- a/strings/ctype-czech.c
+++ b/strings/ctype-czech.c
@@ -631,6 +631,7 @@ CHARSET_INFO my_charset_latin2_czech_ci =
idx_uni_8859_2, /* tab_from_uni */
"","",
4, /* strxfrm_multiply */
+ 1, /* mbminlen */
1, /* mbmaxlen */
0,
&my_charset_8bit_handler,
diff --git a/strings/ctype-euc_kr.c b/strings/ctype-euc_kr.c
index addd7803680..366a5d500ed 100644
--- a/strings/ctype-euc_kr.c
+++ b/strings/ctype-euc_kr.c
@@ -8689,6 +8689,7 @@ CHARSET_INFO my_charset_euckr_korean_ci=
"",
"",
1, /* strxfrm_multiply */
+ 1, /* mbminlen */
2, /* mbmaxlen */
0,
&my_charset_handler,
@@ -8712,6 +8713,7 @@ CHARSET_INFO my_charset_euckr_bin=
"",
"",
1, /* strxfrm_multiply */
+ 1, /* mbminlen */
2, /* mbmaxlen */
0,
&my_charset_handler,
diff --git a/strings/ctype-extra.c b/strings/ctype-extra.c
index 55bfa09ea5f..0085d264416 100644
--- a/strings/ctype-extra.c
+++ b/strings/ctype-extra.c
@@ -34,6 +34,7 @@ CHARSET_INFO compiled_charsets[] = {
0,
0,
0,
+ 0,
NULL,
NULL
}
diff --git a/strings/ctype-gb2312.c b/strings/ctype-gb2312.c
index b84ddc9081b..44a58b2b906 100644
--- a/strings/ctype-gb2312.c
+++ b/strings/ctype-gb2312.c
@@ -5740,6 +5740,7 @@ CHARSET_INFO my_charset_gb2312_chinese_ci=
"",
"",
1, /* strxfrm_multiply */
+ 1, /* mbminlen */
2, /* mbmaxlen */
0,
&my_charset_handler,
@@ -5762,6 +5763,7 @@ CHARSET_INFO my_charset_gb2312_bin=
"",
"",
1, /* strxfrm_multiply */
+ 1, /* mbminlen */
2, /* mbmaxlen */
0,
&my_charset_handler,
diff --git a/strings/ctype-gbk.c b/strings/ctype-gbk.c
index 585dc66be4c..5475c3bd363 100644
--- a/strings/ctype-gbk.c
+++ b/strings/ctype-gbk.c
@@ -9936,6 +9936,7 @@ CHARSET_INFO my_charset_gbk_chinese_ci=
"",
"",
1, /* strxfrm_multiply */
+ 1, /* mbminlen */
2, /* mbmaxlen */
0,
&my_charset_handler,
@@ -9958,6 +9959,7 @@ CHARSET_INFO my_charset_gbk_bin=
"",
"",
1, /* strxfrm_multiply */
+ 1, /* mbminlen */
2, /* mbmaxlen */
0,
&my_charset_handler,
diff --git a/strings/ctype-latin1.c b/strings/ctype-latin1.c
index 15798abb85b..c00ded21575 100644
--- a/strings/ctype-latin1.c
+++ b/strings/ctype-latin1.c
@@ -215,6 +215,7 @@ CHARSET_INFO my_charset_latin1=
NULL, /* tab_from_uni */
"","",
1, /* strxfrm_multiply */
+ 1, /* mbminlen */
1, /* mbmaxlen */
0,
&my_charset_handler,
@@ -410,6 +411,7 @@ CHARSET_INFO my_charset_latin1_german2_ci=
NULL, /* tab_from_uni */
"","",
2, /* strxfrm_multiply */
+ 1, /* mbminlen */
1, /* mbmaxlen */
0,
&my_charset_handler,
@@ -433,6 +435,7 @@ CHARSET_INFO my_charset_latin1_bin=
"",
"",
1, /* strxfrm_multiply */
+ 1, /* mbminlen */
1, /* mbmaxlen */
0,
&my_charset_handler,
diff --git a/strings/ctype-sjis.c b/strings/ctype-sjis.c
index a84fbd16e5d..42f32fe739b 100644
--- a/strings/ctype-sjis.c
+++ b/strings/ctype-sjis.c
@@ -4525,6 +4525,7 @@ CHARSET_INFO my_charset_sjis_japanese_ci=
"",
"",
1, /* strxfrm_multiply */
+ 1, /* mbminlen */
2, /* mbmaxlen */
0,
&my_charset_handler,
@@ -4547,6 +4548,7 @@ CHARSET_INFO my_charset_sjis_bin=
"",
"",
1, /* strxfrm_multiply */
+ 1, /* mbminlen */
2, /* mbmaxlen */
0,
&my_charset_handler,
diff --git a/strings/ctype-tis620.c b/strings/ctype-tis620.c
index 92b2eeb25e0..09552a0dc23 100644
--- a/strings/ctype-tis620.c
+++ b/strings/ctype-tis620.c
@@ -956,6 +956,7 @@ CHARSET_INFO my_charset_tis620_thai_ci=
"",
"",
4, /* strxfrm_multiply */
+ 1, /* mbminlen */
1, /* mbmaxlen */
0,
&my_charset_handler,
@@ -978,6 +979,7 @@ CHARSET_INFO my_charset_tis620_bin=
"",
"",
1, /* strxfrm_multiply */
+ 1, /* mbminlen */
1, /* mbmaxlen */
0,
&my_charset_handler,
diff --git a/strings/ctype-ucs2.c b/strings/ctype-ucs2.c
index beb803a69f2..a7a59fc50f7 100644
--- a/strings/ctype-ucs2.c
+++ b/strings/ctype-ucs2.c
@@ -1322,6 +1322,7 @@ CHARSET_INFO my_charset_ucs2_general_ci=
"",
"",
1, /* strxfrm_multiply */
+ 2, /* mbminlen */
2, /* mbmaxlen */
0,
&my_charset_ucs2_handler,
@@ -1345,6 +1346,7 @@ CHARSET_INFO my_charset_ucs2_bin=
"",
"",
1, /* strxfrm_multiply */
+ 2, /* mbminlen */
2, /* mbmaxlen */
0,
&my_charset_ucs2_handler,
diff --git a/strings/ctype-ujis.c b/strings/ctype-ujis.c
index 2815b70351b..f6928e9426e 100644
--- a/strings/ctype-ujis.c
+++ b/strings/ctype-ujis.c
@@ -8480,6 +8480,7 @@ CHARSET_INFO my_charset_ujis_japanese_ci=
NULL, /* tab_from_uni */
"","",
1, /* strxfrm_multiply */
+ 1, /* mbminlen */
3, /* mbmaxlen */
0,
&my_charset_handler,
@@ -8502,6 +8503,7 @@ CHARSET_INFO my_charset_ujis_bin=
NULL, /* tab_from_uni */
"","",
1, /* strxfrm_multiply */
+ 1, /* mbminlen */
3, /* mbmaxlen */
0,
&my_charset_handler,
diff --git a/strings/ctype-utf8.c b/strings/ctype-utf8.c
index b5716c53ea2..8004fba75b7 100644
--- a/strings/ctype-utf8.c
+++ b/strings/ctype-utf8.c
@@ -2006,6 +2006,7 @@ CHARSET_INFO my_charset_utf8_general_ci=
"",
"",
1, /* strxfrm_multiply */
+ 1, /* mbminlen */
3, /* mbmaxlen */
0,
&my_charset_handler,
@@ -2029,6 +2030,7 @@ CHARSET_INFO my_charset_utf8_bin=
"",
"",
1, /* strxfrm_multiply */
+ 1, /* mbminlen */
3, /* mbmaxlen */
0,
&my_charset_handler,
diff --git a/strings/ctype-win1250ch.c b/strings/ctype-win1250ch.c
index 60a5737009f..d3b5c9d1796 100644
--- a/strings/ctype-win1250ch.c
+++ b/strings/ctype-win1250ch.c
@@ -671,6 +671,7 @@ CHARSET_INFO my_charset_cp1250_czech_ci =
idx_uni_cp1250, /* tab_from_uni */
"","",
2, /* strxfrm_multiply */
+ 1, /* mbminlen */
1, /* mbmaxlen */
0,
&my_charset_8bit_handler,